mindspore 2.2.14__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (1217) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +8 -5
  5. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +124 -25
  9. mindspore/_extends/builtin_operations.py +2 -1
  10. mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
  11. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
  12. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
  13. mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
  14. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
  15. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
  16. mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
  17. mindspore/_extends/parse/__init__.py +18 -14
  18. mindspore/_extends/parse/compile_config.py +299 -0
  19. mindspore/_extends/parse/namespace.py +2 -2
  20. mindspore/_extends/parse/parser.py +182 -68
  21. mindspore/_extends/parse/resources.py +45 -14
  22. mindspore/_extends/parse/standard_method.py +192 -252
  23. mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
  24. mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
  25. mindspore/_extends/remote/kernel_build_server.py +2 -0
  26. mindspore/_profiler.py +30 -0
  27. mindspore/amp.py +67 -26
  28. mindspore/atlprov.dll +0 -0
  29. mindspore/avcodec-59.dll +0 -0
  30. mindspore/avdevice-59.dll +0 -0
  31. mindspore/avfilter-8.dll +0 -0
  32. mindspore/avformat-59.dll +0 -0
  33. mindspore/avutil-57.dll +0 -0
  34. mindspore/boost/adasum.py +1 -1
  35. mindspore/boost/base.py +1 -1
  36. mindspore/boost/boost_cell_wrapper.py +2 -2
  37. mindspore/boost/grad_freeze.py +2 -2
  38. mindspore/boost/group_loss_scale_manager.py +1 -1
  39. mindspore/boost/less_batch_normalization.py +9 -6
  40. mindspore/c1.dll +0 -0
  41. mindspore/c1xx.dll +0 -0
  42. mindspore/c2.dll +0 -0
  43. mindspore/common/__init__.py +20 -7
  44. mindspore/common/_jit_fallback_utils.py +2 -3
  45. mindspore/common/_pijit_context.py +190 -0
  46. mindspore/common/_register_for_adapter.py +7 -0
  47. mindspore/common/_register_for_recompute.py +48 -0
  48. mindspore/common/_register_for_tensor.py +10 -10
  49. mindspore/common/_stub_tensor.py +7 -1
  50. mindspore/common/_tensor_overload.py +139 -0
  51. mindspore/common/_utils.py +5 -17
  52. mindspore/common/api.py +449 -129
  53. mindspore/common/auto_dynamic_shape.py +27 -14
  54. mindspore/common/dtype.py +17 -10
  55. mindspore/common/dump.py +8 -11
  56. mindspore/common/file_system.py +48 -0
  57. mindspore/common/generator.py +254 -0
  58. mindspore/common/hook_handle.py +65 -30
  59. mindspore/common/initializer.py +1 -1
  60. mindspore/common/jit_config.py +34 -14
  61. mindspore/common/lazy_inline.py +72 -19
  62. mindspore/common/mindir_util.py +12 -2
  63. mindspore/common/mutable.py +79 -14
  64. mindspore/common/no_inline.py +54 -0
  65. mindspore/common/np_dtype.py +25 -0
  66. mindspore/common/parameter.py +73 -21
  67. mindspore/common/recompute.py +292 -0
  68. mindspore/common/seed.py +9 -9
  69. mindspore/common/sparse_tensor.py +276 -24
  70. mindspore/common/symbol.py +122 -0
  71. mindspore/common/tensor.py +668 -514
  72. mindspore/communication/__init__.py +6 -11
  73. mindspore/communication/_comm_helper.py +43 -3
  74. mindspore/communication/comm_func.py +1395 -0
  75. mindspore/communication/management.py +117 -104
  76. mindspore/config/op_info.config +22 -54
  77. mindspore/context.py +455 -71
  78. mindspore/dataset/__init__.py +5 -5
  79. mindspore/dataset/audio/__init__.py +6 -6
  80. mindspore/dataset/audio/transforms.py +711 -158
  81. mindspore/dataset/callback/ds_callback.py +2 -2
  82. mindspore/dataset/core/config.py +7 -0
  83. mindspore/dataset/core/validator_helpers.py +7 -0
  84. mindspore/dataset/engine/cache_client.py +2 -2
  85. mindspore/dataset/engine/datasets.py +201 -116
  86. mindspore/dataset/engine/datasets_audio.py +14 -14
  87. mindspore/dataset/engine/datasets_standard_format.py +83 -3
  88. mindspore/dataset/engine/datasets_text.py +39 -39
  89. mindspore/dataset/engine/datasets_user_defined.py +230 -141
  90. mindspore/dataset/engine/datasets_vision.py +78 -74
  91. mindspore/dataset/engine/iterators.py +29 -0
  92. mindspore/dataset/engine/obs/util.py +7 -0
  93. mindspore/dataset/engine/offload.py +5 -7
  94. mindspore/dataset/engine/queue.py +138 -66
  95. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  96. mindspore/dataset/engine/validators.py +41 -15
  97. mindspore/dataset/text/__init__.py +2 -5
  98. mindspore/dataset/text/transforms.py +408 -121
  99. mindspore/dataset/text/utils.py +9 -9
  100. mindspore/dataset/transforms/__init__.py +0 -3
  101. mindspore/dataset/transforms/transforms.py +261 -76
  102. mindspore/dataset/utils/browse_dataset.py +9 -9
  103. mindspore/dataset/utils/line_reader.py +2 -0
  104. mindspore/dataset/vision/__init__.py +7 -10
  105. mindspore/dataset/vision/c_transforms.py +10 -10
  106. mindspore/dataset/vision/py_transforms_util.py +1 -1
  107. mindspore/dataset/vision/transforms.py +2844 -549
  108. mindspore/dataset/vision/utils.py +161 -10
  109. mindspore/dataset/vision/validators.py +16 -3
  110. mindspore/dnnl.dll +0 -0
  111. mindspore/dpcmi.dll +0 -0
  112. mindspore/{rewrite/ast_creator_register.py → experimental/es/__init__.py} +5 -20
  113. mindspore/experimental/es/embedding_service.py +883 -0
  114. mindspore/experimental/es/embedding_service_layer.py +581 -0
  115. mindspore/experimental/llm_boost/__init__.py +21 -0
  116. mindspore/experimental/llm_boost/atb/__init__.py +23 -0
  117. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  118. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  119. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  120. mindspore/experimental/llm_boost/register.py +129 -0
  121. mindspore/experimental/llm_boost/utils.py +31 -0
  122. mindspore/experimental/optim/__init__.py +12 -2
  123. mindspore/experimental/optim/adadelta.py +161 -0
  124. mindspore/experimental/optim/adagrad.py +168 -0
  125. mindspore/experimental/optim/adam.py +35 -34
  126. mindspore/experimental/optim/adamax.py +170 -0
  127. mindspore/experimental/optim/adamw.py +124 -15
  128. mindspore/experimental/optim/asgd.py +153 -0
  129. mindspore/experimental/optim/lr_scheduler.py +66 -121
  130. mindspore/experimental/optim/nadam.py +157 -0
  131. mindspore/experimental/optim/optimizer.py +18 -8
  132. mindspore/experimental/optim/radam.py +194 -0
  133. mindspore/experimental/optim/rmsprop.py +154 -0
  134. mindspore/experimental/optim/rprop.py +164 -0
  135. mindspore/experimental/optim/sgd.py +28 -19
  136. mindspore/hal/__init__.py +40 -0
  137. mindspore/hal/_ascend.py +57 -0
  138. mindspore/hal/_base.py +57 -0
  139. mindspore/hal/_cpu.py +56 -0
  140. mindspore/hal/_gpu.py +57 -0
  141. mindspore/hal/contiguous_tensors_handle.py +175 -0
  142. mindspore/hal/device.py +356 -0
  143. mindspore/hal/event.py +179 -0
  144. mindspore/hal/memory.py +326 -0
  145. mindspore/hal/stream.py +357 -0
  146. mindspore/include/api/data_type.h +2 -2
  147. mindspore/include/api/dual_abi_helper.h +16 -3
  148. mindspore/include/api/model.h +4 -3
  149. mindspore/include/api/model_group.h +13 -1
  150. mindspore/include/api/status.h +14 -0
  151. mindspore/include/api/types.h +10 -10
  152. mindspore/include/c_api/model_c.h +173 -0
  153. mindspore/include/c_api/types_c.h +19 -0
  154. mindspore/include/dataset/config.h +2 -2
  155. mindspore/include/dataset/constants.h +2 -2
  156. mindspore/include/dataset/execute.h +3 -5
  157. mindspore/include/dataset/vision.h +58 -2
  158. mindspore/jpeg62.dll +0 -0
  159. mindspore/log.py +3 -3
  160. mindspore/mindrecord/__init__.py +5 -1
  161. mindspore/mindrecord/config.py +809 -0
  162. mindspore/mindrecord/filereader.py +25 -0
  163. mindspore/mindrecord/filewriter.py +138 -103
  164. mindspore/mindrecord/mindpage.py +40 -6
  165. mindspore/mindrecord/shardutils.py +3 -2
  166. mindspore/mindrecord/shardwriter.py +7 -0
  167. mindspore/mindrecord/tools/cifar100_to_mr.py +8 -13
  168. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -15
  169. mindspore/mindrecord/tools/csv_to_mr.py +4 -9
  170. mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
  171. mindspore/mindrecord/tools/mnist_to_mr.py +7 -12
  172. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -6
  173. mindspore/mindspore_backend.dll +0 -0
  174. mindspore/mindspore_common.dll +0 -0
  175. mindspore/mindspore_core.dll +0 -0
  176. mindspore/mindspore_glog.dll +0 -0
  177. mindspore/mindspore_np_dtype.dll +0 -0
  178. mindspore/mindspore_ops.dll +0 -0
  179. mindspore/mint/__init__.py +1586 -0
  180. mindspore/mint/distributed/__init__.py +31 -0
  181. mindspore/mint/distributed/distributed.py +254 -0
  182. mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
  183. mindspore/mint/nn/__init__.py +757 -0
  184. mindspore/mint/nn/functional.py +679 -0
  185. mindspore/mint/nn/layer/__init__.py +39 -0
  186. mindspore/mint/nn/layer/activation.py +133 -0
  187. mindspore/mint/nn/layer/normalization.py +477 -0
  188. mindspore/mint/nn/layer/pooling.py +110 -0
  189. mindspore/mint/optim/__init__.py +24 -0
  190. mindspore/mint/optim/adamw.py +206 -0
  191. mindspore/mint/special/__init__.py +63 -0
  192. mindspore/msobj140.dll +0 -0
  193. mindspore/mspdb140.dll +0 -0
  194. mindspore/mspdbcore.dll +0 -0
  195. mindspore/mspdbst.dll +0 -0
  196. mindspore/mspft140.dll +0 -0
  197. mindspore/msvcdis140.dll +0 -0
  198. mindspore/msvcp140_1.dll +0 -0
  199. mindspore/msvcp140_2.dll +0 -0
  200. mindspore/msvcp140_atomic_wait.dll +0 -0
  201. mindspore/msvcp140_codecvt_ids.dll +0 -0
  202. mindspore/multiprocessing/__init__.py +73 -0
  203. mindspore/nn/cell.py +461 -323
  204. mindspore/nn/dynamic_lr.py +2 -2
  205. mindspore/nn/layer/activation.py +292 -135
  206. mindspore/nn/layer/basic.py +288 -83
  207. mindspore/nn/layer/channel_shuffle.py +3 -16
  208. mindspore/nn/layer/container.py +3 -3
  209. mindspore/nn/layer/conv.py +75 -66
  210. mindspore/nn/layer/embedding.py +221 -45
  211. mindspore/nn/layer/image.py +4 -7
  212. mindspore/nn/layer/math.py +1 -1
  213. mindspore/nn/layer/normalization.py +150 -68
  214. mindspore/nn/layer/padding.py +64 -87
  215. mindspore/nn/layer/pooling.py +175 -12
  216. mindspore/nn/layer/rnn_cells.py +6 -16
  217. mindspore/nn/layer/rnns.py +6 -5
  218. mindspore/nn/layer/thor_layer.py +1 -2
  219. mindspore/nn/layer/timedistributed.py +1 -1
  220. mindspore/nn/layer/transformer.py +55 -53
  221. mindspore/nn/learning_rate_schedule.py +6 -5
  222. mindspore/nn/loss/__init__.py +2 -2
  223. mindspore/nn/loss/loss.py +145 -88
  224. mindspore/nn/optim/__init__.py +2 -1
  225. mindspore/nn/optim/ada_grad.py +4 -2
  226. mindspore/nn/optim/adadelta.py +4 -2
  227. mindspore/nn/optim/adafactor.py +1 -1
  228. mindspore/nn/optim/adam.py +102 -181
  229. mindspore/nn/optim/adamax.py +4 -2
  230. mindspore/nn/optim/adasum.py +3 -3
  231. mindspore/nn/optim/asgd.py +4 -2
  232. mindspore/nn/optim/ftrl.py +31 -61
  233. mindspore/nn/optim/lamb.py +5 -3
  234. mindspore/nn/optim/lars.py +2 -2
  235. mindspore/nn/optim/lazyadam.py +6 -4
  236. mindspore/nn/optim/momentum.py +13 -25
  237. mindspore/nn/optim/optimizer.py +6 -3
  238. mindspore/nn/optim/proximal_ada_grad.py +4 -2
  239. mindspore/nn/optim/rmsprop.py +9 -3
  240. mindspore/nn/optim/rprop.py +4 -2
  241. mindspore/nn/optim/sgd.py +5 -3
  242. mindspore/nn/optim/tft_wrapper.py +127 -0
  243. mindspore/nn/optim/thor.py +2 -2
  244. mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
  245. mindspore/nn/probability/distribution/beta.py +2 -2
  246. mindspore/nn/probability/distribution/categorical.py +4 -6
  247. mindspore/nn/probability/distribution/cauchy.py +2 -2
  248. mindspore/nn/probability/distribution/exponential.py +2 -2
  249. mindspore/nn/probability/distribution/geometric.py +1 -1
  250. mindspore/nn/probability/distribution/gumbel.py +2 -2
  251. mindspore/nn/probability/distribution/logistic.py +1 -1
  252. mindspore/nn/probability/distribution/poisson.py +2 -2
  253. mindspore/nn/probability/distribution/uniform.py +2 -2
  254. mindspore/nn/reinforcement/_tensors_queue.py +13 -1
  255. mindspore/nn/wrap/__init__.py +2 -1
  256. mindspore/nn/wrap/cell_wrapper.py +46 -12
  257. mindspore/nn/wrap/grad_reducer.py +148 -8
  258. mindspore/nn/wrap/loss_scale.py +44 -7
  259. mindspore/numpy/__init__.py +2 -0
  260. mindspore/numpy/array_creations.py +67 -68
  261. mindspore/numpy/array_ops.py +70 -66
  262. mindspore/numpy/dtypes.py +3 -3
  263. mindspore/numpy/fft.py +966 -0
  264. mindspore/numpy/logic_ops.py +11 -10
  265. mindspore/numpy/math_ops.py +147 -152
  266. mindspore/numpy/utils.py +3 -0
  267. mindspore/numpy/utils_const.py +4 -4
  268. mindspore/opencv_core452.dll +0 -0
  269. mindspore/opencv_imgcodecs452.dll +0 -0
  270. mindspore/opencv_imgproc452.dll +0 -0
  271. mindspore/ops/__init__.py +9 -6
  272. mindspore/ops/_grad_experimental/grad_array_ops.py +4 -129
  273. mindspore/ops/_grad_experimental/grad_comm_ops.py +135 -36
  274. mindspore/ops/_grad_experimental/grad_math_ops.py +61 -298
  275. mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
  276. mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
  277. mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
  278. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  279. mindspore/ops/_op_impl/__init__.py +0 -1
  280. mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
  281. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +1 -1
  282. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
  283. mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
  284. mindspore/ops/_op_impl/cpu/__init__.py +1 -3
  285. mindspore/ops/_op_impl/cpu/adam.py +2 -2
  286. mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
  287. mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
  288. mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
  289. mindspore/ops/_vmap/vmap_array_ops.py +162 -101
  290. mindspore/ops/_vmap/vmap_base.py +8 -1
  291. mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
  292. mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
  293. mindspore/ops/_vmap/vmap_image_ops.py +70 -13
  294. mindspore/ops/_vmap/vmap_math_ops.py +147 -59
  295. mindspore/ops/_vmap/vmap_nn_ops.py +292 -117
  296. mindspore/ops/_vmap/vmap_other_ops.py +1 -1
  297. mindspore/ops/auto_generate/__init__.py +31 -0
  298. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
  299. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
  300. mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
  301. mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
  302. mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
  303. mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
  304. mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
  305. mindspore/ops/composite/__init__.py +5 -2
  306. mindspore/ops/composite/base.py +201 -66
  307. mindspore/ops/composite/math_ops.py +10 -49
  308. mindspore/ops/composite/multitype_ops/_compile_utils.py +192 -618
  309. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
  310. mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
  311. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
  312. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
  313. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
  314. mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
  315. mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
  316. mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
  317. mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
  318. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
  319. mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
  320. mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
  321. mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
  322. mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
  323. mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
  324. mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
  325. mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
  326. mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
  327. mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
  328. mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
  329. mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
  330. mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
  331. mindspore/ops/composite/multitype_ops/not_in_impl.py +8 -3
  332. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
  333. mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
  334. mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
  335. mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
  336. mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
  337. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
  338. mindspore/ops/deprecated.py +14 -3
  339. mindspore/ops/function/__init__.py +53 -11
  340. mindspore/ops/function/array_func.py +1269 -1821
  341. mindspore/ops/function/clip_func.py +19 -31
  342. mindspore/ops/function/debug_func.py +114 -5
  343. mindspore/ops/function/fft_func.py +44 -0
  344. mindspore/ops/function/grad/grad_func.py +30 -22
  345. mindspore/ops/function/image_func.py +27 -21
  346. mindspore/ops/function/linalg_func.py +35 -68
  347. mindspore/ops/function/math_func.py +1170 -2697
  348. mindspore/ops/function/nn_func.py +2116 -1128
  349. mindspore/ops/function/other_func.py +8 -8
  350. mindspore/ops/function/parameter_func.py +5 -93
  351. mindspore/ops/function/random_func.py +435 -113
  352. mindspore/ops/function/reshard_func.py +104 -0
  353. mindspore/ops/function/sparse_func.py +4 -4
  354. mindspore/ops/function/sparse_unary_func.py +9 -16
  355. mindspore/ops/function/spectral_func.py +1 -1
  356. mindspore/ops/function/vmap_func.py +16 -15
  357. mindspore/ops/functional.py +355 -346
  358. mindspore/ops/op_info_register.py +18 -45
  359. mindspore/ops/operations/__init__.py +38 -24
  360. mindspore/ops/operations/_grad_ops.py +21 -927
  361. mindspore/ops/operations/_infer_ops.py +19 -0
  362. mindspore/ops/operations/_inner_ops.py +173 -607
  363. mindspore/ops/operations/_rl_inner_ops.py +2 -2
  364. mindspore/ops/operations/_scalar_ops.py +5 -480
  365. mindspore/ops/operations/_sequence_ops.py +6 -36
  366. mindspore/ops/operations/_tensor_array.py +8 -8
  367. mindspore/ops/operations/array_ops.py +106 -2837
  368. mindspore/ops/operations/comm_ops.py +799 -127
  369. mindspore/ops/operations/custom_ops.py +124 -119
  370. mindspore/ops/operations/debug_ops.py +142 -41
  371. mindspore/ops/operations/image_ops.py +1 -217
  372. mindspore/ops/operations/inner_ops.py +5 -40
  373. mindspore/ops/operations/linalg_ops.py +1 -49
  374. mindspore/ops/operations/manually_defined/__init__.py +24 -0
  375. mindspore/ops/operations/manually_defined/_inner.py +73 -0
  376. mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
  377. mindspore/ops/operations/math_ops.py +666 -4972
  378. mindspore/ops/operations/nn_ops.py +205 -2213
  379. mindspore/ops/operations/other_ops.py +60 -49
  380. mindspore/ops/operations/random_ops.py +50 -54
  381. mindspore/ops/operations/reshard_ops.py +53 -0
  382. mindspore/ops/operations/sparse_ops.py +4 -4
  383. mindspore/ops/primitive.py +216 -103
  384. mindspore/ops_generate/__init__.py +27 -0
  385. mindspore/ops_generate/arg_dtype_cast.py +252 -0
  386. mindspore/ops_generate/arg_handler.py +197 -0
  387. mindspore/ops_generate/gen_aclnn_implement.py +263 -0
  388. mindspore/ops_generate/gen_constants.py +36 -0
  389. mindspore/ops_generate/gen_ops.py +1099 -0
  390. mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
  391. mindspore/ops_generate/gen_pyboost_func.py +1052 -0
  392. mindspore/ops_generate/gen_utils.py +209 -0
  393. mindspore/ops_generate/op_proto.py +145 -0
  394. mindspore/ops_generate/pyboost_utils.py +367 -0
  395. mindspore/ops_generate/template.py +261 -0
  396. mindspore/parallel/__init__.py +8 -4
  397. mindspore/parallel/_auto_parallel_context.py +100 -10
  398. mindspore/parallel/_cell_wrapper.py +99 -9
  399. mindspore/parallel/_cost_model_context.py +1 -1
  400. mindspore/parallel/_dp_allreduce_fusion.py +159 -159
  401. mindspore/parallel/_parallel_serialization.py +67 -23
  402. mindspore/parallel/_ps_context.py +1 -1
  403. mindspore/parallel/_recovery_context.py +1 -1
  404. mindspore/parallel/_tensor.py +99 -22
  405. mindspore/parallel/_transformer/__init__.py +1 -1
  406. mindspore/parallel/_transformer/layers.py +1 -1
  407. mindspore/parallel/_transformer/loss.py +1 -1
  408. mindspore/parallel/_transformer/moe.py +1 -1
  409. mindspore/parallel/_transformer/op_parallel_config.py +1 -1
  410. mindspore/parallel/_transformer/transformer.py +2 -2
  411. mindspore/parallel/_utils.py +173 -6
  412. mindspore/parallel/algo_parameter_config.py +8 -10
  413. mindspore/parallel/checkpoint_transform.py +204 -38
  414. mindspore/parallel/cluster/__init__.py +15 -0
  415. mindspore/parallel/cluster/process_entity/__init__.py +18 -0
  416. mindspore/parallel/cluster/process_entity/_api.py +352 -0
  417. mindspore/parallel/cluster/process_entity/_utils.py +101 -0
  418. mindspore/parallel/cluster/run.py +136 -0
  419. mindspore/parallel/mpi/__init__.py +1 -1
  420. mindspore/parallel/mpi/_mpi_config.py +1 -1
  421. mindspore/parallel/parameter_broadcast.py +151 -0
  422. mindspore/parallel/shard.py +279 -37
  423. mindspore/parallel/transform_safetensors.py +993 -0
  424. mindspore/pgodb140.dll +0 -0
  425. mindspore/pgort140.dll +0 -0
  426. mindspore/profiler/__init__.py +4 -2
  427. mindspore/profiler/common/constant.py +29 -0
  428. mindspore/profiler/common/process_pool.py +41 -0
  429. mindspore/profiler/common/registry.py +47 -0
  430. mindspore/profiler/common/singleton.py +28 -0
  431. mindspore/profiler/common/util.py +153 -0
  432. mindspore/profiler/dynamic_profiler.py +694 -0
  433. mindspore/profiler/envprofiling.py +18 -20
  434. mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
  435. mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
  436. mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
  437. mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
  438. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
  439. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
  440. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
  441. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  442. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
  443. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
  444. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
  445. mindspore/profiler/parser/ascend_cluster_generator.py +14 -9
  446. mindspore/profiler/parser/ascend_communicate_generator.py +0 -1
  447. mindspore/profiler/parser/ascend_flops_generator.py +20 -4
  448. mindspore/profiler/parser/ascend_hccl_generator.py +29 -278
  449. mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
  450. mindspore/profiler/parser/ascend_memory_generator.py +185 -0
  451. mindspore/profiler/parser/ascend_msprof_exporter.py +148 -146
  452. mindspore/profiler/parser/ascend_msprof_generator.py +73 -283
  453. mindspore/profiler/parser/ascend_op_generator.py +92 -42
  454. mindspore/profiler/parser/ascend_timeline_generator.py +298 -133
  455. mindspore/profiler/parser/base_timeline_generator.py +25 -25
  456. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  457. mindspore/profiler/parser/framework_parser.py +4 -393
  458. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  459. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  460. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  461. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  462. mindspore/profiler/parser/integrator.py +3 -1
  463. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  464. mindspore/profiler/parser/minddata_parser.py +72 -3
  465. mindspore/profiler/parser/profiler_info.py +94 -7
  466. mindspore/profiler/profiler.py +153 -0
  467. mindspore/profiler/profiling.py +631 -508
  468. mindspore/rewrite/__init__.py +2 -14
  469. mindspore/rewrite/api/node.py +122 -36
  470. mindspore/rewrite/api/pattern_engine.py +2 -3
  471. mindspore/rewrite/api/scoped_value.py +16 -15
  472. mindspore/rewrite/api/symbol_tree.py +45 -29
  473. mindspore/rewrite/ast_helpers/__init__.py +3 -6
  474. mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
  475. mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
  476. mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
  477. mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
  478. mindspore/rewrite/common/__init__.py +1 -2
  479. mindspore/rewrite/common/config.py +24 -0
  480. mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
  481. mindspore/rewrite/{namer.py → common/namer.py} +63 -18
  482. mindspore/rewrite/common/namespace.py +118 -0
  483. mindspore/rewrite/node/__init__.py +5 -5
  484. mindspore/rewrite/node/call_function.py +23 -7
  485. mindspore/rewrite/node/cell_container.py +7 -3
  486. mindspore/rewrite/node/control_flow.py +53 -28
  487. mindspore/rewrite/node/node.py +212 -196
  488. mindspore/rewrite/node/node_manager.py +51 -22
  489. mindspore/rewrite/node/node_topological_manager.py +3 -23
  490. mindspore/rewrite/parsers/__init__.py +12 -0
  491. mindspore/rewrite/parsers/arguments_parser.py +8 -9
  492. mindspore/rewrite/parsers/assign_parser.py +637 -413
  493. mindspore/rewrite/parsers/attribute_parser.py +3 -4
  494. mindspore/rewrite/parsers/class_def_parser.py +115 -148
  495. mindspore/rewrite/parsers/constant_parser.py +5 -5
  496. mindspore/rewrite/parsers/container_parser.py +4 -6
  497. mindspore/rewrite/parsers/expr_parser.py +55 -0
  498. mindspore/rewrite/parsers/for_parser.py +31 -98
  499. mindspore/rewrite/parsers/function_def_parser.py +13 -5
  500. mindspore/rewrite/parsers/if_parser.py +28 -10
  501. mindspore/rewrite/parsers/module_parser.py +8 -182
  502. mindspore/rewrite/parsers/parser.py +1 -5
  503. mindspore/rewrite/parsers/parser_register.py +1 -1
  504. mindspore/rewrite/parsers/return_parser.py +5 -10
  505. mindspore/rewrite/parsers/while_parser.py +59 -0
  506. mindspore/rewrite/sparsify/utils.py +1 -1
  507. mindspore/rewrite/symbol_tree/__init__.py +20 -0
  508. mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +705 -186
  509. mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
  510. mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
  511. mindspore/run_check/_check_version.py +40 -115
  512. mindspore/run_check/run_check.py +1 -1
  513. mindspore/safeguard/rewrite_obfuscation.py +597 -263
  514. mindspore/swresample-4.dll +0 -0
  515. mindspore/swscale-6.dll +0 -0
  516. mindspore/tbbmalloc.dll +0 -0
  517. mindspore/tinyxml2.dll +0 -0
  518. mindspore/train/__init__.py +7 -5
  519. mindspore/train/_utils.py +204 -4
  520. mindspore/train/amp.py +335 -295
  521. mindspore/train/anf_ir_pb2.py +14 -2
  522. mindspore/train/callback/__init__.py +5 -2
  523. mindspore/train/callback/_backup_and_restore.py +5 -5
  524. mindspore/train/callback/_callback.py +4 -4
  525. mindspore/train/callback/_checkpoint.py +220 -43
  526. mindspore/train/callback/_cluster_monitor.py +201 -0
  527. mindspore/train/callback/_early_stop.py +2 -2
  528. mindspore/train/callback/_flops_collector.py +239 -0
  529. mindspore/train/callback/_landscape.py +15 -9
  530. mindspore/train/callback/_loss_monitor.py +5 -5
  531. mindspore/train/callback/_on_request_exit.py +136 -33
  532. mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
  533. mindspore/train/callback/_summary_collector.py +12 -12
  534. mindspore/train/callback/_tft_register.py +352 -0
  535. mindspore/train/callback/_time_monitor.py +3 -3
  536. mindspore/train/data_sink.py +6 -5
  537. mindspore/train/dataset_helper.py +66 -23
  538. mindspore/train/loss_scale_manager.py +2 -2
  539. mindspore/train/metrics/accuracy.py +7 -7
  540. mindspore/train/metrics/confusion_matrix.py +8 -6
  541. mindspore/train/metrics/cosine_similarity.py +6 -4
  542. mindspore/train/metrics/error.py +2 -2
  543. mindspore/train/metrics/metric.py +3 -3
  544. mindspore/train/metrics/perplexity.py +2 -1
  545. mindspore/train/metrics/roc.py +4 -4
  546. mindspore/train/metrics/topk.py +2 -2
  547. mindspore/train/mind_ir_pb2.py +116 -37
  548. mindspore/train/model.py +382 -76
  549. mindspore/train/serialization.py +787 -288
  550. mindspore/train/summary/_summary_adapter.py +1 -1
  551. mindspore/train/summary/summary_record.py +51 -28
  552. mindspore/train/train_thor/convert_utils.py +3 -3
  553. mindspore/turbojpeg.dll +0 -0
  554. mindspore/utils/__init__.py +21 -0
  555. mindspore/utils/utils.py +60 -0
  556. mindspore/vcmeta.dll +0 -0
  557. mindspore/vcruntime140.dll +0 -0
  558. mindspore/vcruntime140_1.dll +0 -0
  559. mindspore/version.py +1 -1
  560. {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/METADATA +8 -4
  561. mindspore-2.4.0.dist-info/RECORD +1406 -0
  562. {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +1 -0
  563. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
  564. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
  565. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
  566. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
  567. mindspore/gen_ops.py +0 -273
  568. mindspore/include/c_api/ms/abstract.h +0 -67
  569. mindspore/include/c_api/ms/attribute.h +0 -197
  570. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  571. mindspore/include/c_api/ms/base/macros.h +0 -32
  572. mindspore/include/c_api/ms/base/status.h +0 -33
  573. mindspore/include/c_api/ms/base/types.h +0 -282
  574. mindspore/include/c_api/ms/context.h +0 -102
  575. mindspore/include/c_api/ms/graph.h +0 -160
  576. mindspore/include/c_api/ms/node.h +0 -606
  577. mindspore/include/c_api/ms/tensor.h +0 -161
  578. mindspore/include/c_api/ms/value.h +0 -84
  579. mindspore/mindspore_shared_lib.dll +0 -0
  580. mindspore/nn/layer/flash_attention.py +0 -189
  581. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  582. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  583. mindspore/ops/_op_impl/cpu/concat.py +0 -39
  584. mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
  585. mindspore/ops/_op_impl/tbe/__init__.py +0 -47
  586. mindspore/ops/_op_impl/tbe/abs.py +0 -38
  587. mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
  588. mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
  589. mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
  590. mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
  591. mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
  592. mindspore/ops/_op_impl/tbe/acos.py +0 -37
  593. mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
  594. mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
  595. mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
  596. mindspore/ops/_op_impl/tbe/acosh.py +0 -37
  597. mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
  598. mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
  599. mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
  600. mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
  601. mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
  602. mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
  603. mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
  604. mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
  605. mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
  606. mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
  607. mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
  608. mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
  609. mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
  610. mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
  611. mindspore/ops/_op_impl/tbe/add.py +0 -42
  612. mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
  613. mindspore/ops/_op_impl/tbe/add_n.py +0 -39
  614. mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
  615. mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
  616. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
  617. mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
  618. mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
  619. mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
  620. mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
  621. mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
  622. mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
  623. mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
  624. mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
  625. mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
  626. mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
  627. mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
  628. mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
  629. mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
  630. mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
  631. mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
  632. mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
  633. mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
  634. mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
  635. mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
  636. mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
  637. mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
  638. mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
  639. mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
  640. mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
  641. mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
  642. mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
  643. mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
  644. mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
  645. mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
  646. mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
  647. mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
  648. mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
  649. mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
  650. mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
  651. mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
  652. mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
  653. mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
  654. mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
  655. mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
  656. mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
  657. mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
  658. mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
  659. mindspore/ops/_op_impl/tbe/asin.py +0 -37
  660. mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
  661. mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
  662. mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
  663. mindspore/ops/_op_impl/tbe/asinh.py +0 -37
  664. mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
  665. mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
  666. mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
  667. mindspore/ops/_op_impl/tbe/assign.py +0 -79
  668. mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
  669. mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
  670. mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
  671. mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
  672. mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
  673. mindspore/ops/_op_impl/tbe/atan.py +0 -37
  674. mindspore/ops/_op_impl/tbe/atan2.py +0 -38
  675. mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
  676. mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
  677. mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
  678. mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
  679. mindspore/ops/_op_impl/tbe/atanh.py +0 -37
  680. mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
  681. mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
  682. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
  683. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
  684. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
  685. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
  686. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
  687. mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
  688. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
  689. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
  690. mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
  691. mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
  692. mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
  693. mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
  694. mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
  695. mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
  696. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
  697. mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
  698. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
  699. mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
  700. mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
  701. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
  702. mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
  703. mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
  704. mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
  705. mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
  706. mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
  707. mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
  708. mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
  709. mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
  710. mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
  711. mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
  712. mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
  713. mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
  714. mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
  715. mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
  716. mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
  717. mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
  718. mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
  719. mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
  720. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
  721. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
  722. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
  723. mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
  724. mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
  725. mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
  726. mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
  727. mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
  728. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
  729. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
  730. mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
  731. mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
  732. mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
  733. mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
  734. mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
  735. mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
  736. mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
  737. mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
  738. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
  739. mindspore/ops/_op_impl/tbe/cast.py +0 -55
  740. mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
  741. mindspore/ops/_op_impl/tbe/cdist.py +0 -38
  742. mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
  743. mindspore/ops/_op_impl/tbe/ceil.py +0 -37
  744. mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
  745. mindspore/ops/_op_impl/tbe/celu.py +0 -39
  746. mindspore/ops/_op_impl/tbe/centralization.py +0 -39
  747. mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
  748. mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
  749. mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
  750. mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
  751. mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
  752. mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
  753. mindspore/ops/_op_impl/tbe/concat.py +0 -40
  754. mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
  755. mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
  756. mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
  757. mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
  758. mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
  759. mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
  760. mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
  761. mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
  762. mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
  763. mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
  764. mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
  765. mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
  766. mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
  767. mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
  768. mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
  769. mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
  770. mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
  771. mindspore/ops/_op_impl/tbe/cos.py +0 -37
  772. mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
  773. mindspore/ops/_op_impl/tbe/cosh.py +0 -37
  774. mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
  775. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
  776. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
  777. mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
  778. mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
  779. mindspore/ops/_op_impl/tbe/cummin.py +0 -41
  780. mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
  781. mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
  782. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
  783. mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
  784. mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
  785. mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
  786. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
  787. mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
  788. mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
  789. mindspore/ops/_op_impl/tbe/diag.py +0 -38
  790. mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
  791. mindspore/ops/_op_impl/tbe/dilation.py +0 -40
  792. mindspore/ops/_op_impl/tbe/div.py +0 -41
  793. mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
  794. mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
  795. mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
  796. mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
  797. mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
  798. mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
  799. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
  800. mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
  801. mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
  802. mindspore/ops/_op_impl/tbe/elu.py +0 -38
  803. mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
  804. mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
  805. mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
  806. mindspore/ops/_op_impl/tbe/equal.py +0 -42
  807. mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
  808. mindspore/ops/_op_impl/tbe/erf.py +0 -37
  809. mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
  810. mindspore/ops/_op_impl/tbe/erfc.py +0 -37
  811. mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
  812. mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
  813. mindspore/ops/_op_impl/tbe/exp.py +0 -40
  814. mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
  815. mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
  816. mindspore/ops/_op_impl/tbe/expm1.py +0 -37
  817. mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
  818. mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
  819. mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
  820. mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
  821. mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
  822. mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
  823. mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
  824. mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
  825. mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
  826. mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
  827. mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
  828. mindspore/ops/_op_impl/tbe/fill.py +0 -56
  829. mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
  830. mindspore/ops/_op_impl/tbe/flatten.py +0 -48
  831. mindspore/ops/_op_impl/tbe/floor.py +0 -37
  832. mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
  833. mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
  834. mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
  835. mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
  836. mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
  837. mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
  838. mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
  839. mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
  840. mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
  841. mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
  842. mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
  843. mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
  844. mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
  845. mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
  846. mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
  847. mindspore/ops/_op_impl/tbe/gelu.py +0 -37
  848. mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
  849. mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
  850. mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
  851. mindspore/ops/_op_impl/tbe/ger.py +0 -43
  852. mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
  853. mindspore/ops/_op_impl/tbe/greater.py +0 -43
  854. mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
  855. mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
  856. mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
  857. mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
  858. mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
  859. mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
  860. mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
  861. mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
  862. mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
  863. mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
  864. mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
  865. mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
  866. mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
  867. mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
  868. mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
  869. mindspore/ops/_op_impl/tbe/im2col.py +0 -42
  870. mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
  871. mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
  872. mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
  873. mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
  874. mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
  875. mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
  876. mindspore/ops/_op_impl/tbe/inv.py +0 -38
  877. mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
  878. mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
  879. mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
  880. mindspore/ops/_op_impl/tbe/invert.py +0 -37
  881. mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
  882. mindspore/ops/_op_impl/tbe/iou.py +0 -38
  883. mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
  884. mindspore/ops/_op_impl/tbe/is_close.py +0 -40
  885. mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
  886. mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
  887. mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
  888. mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
  889. mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
  890. mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
  891. mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
  892. mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
  893. mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
  894. mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
  895. mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
  896. mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
  897. mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
  898. mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
  899. mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
  900. mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
  901. mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
  902. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
  903. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
  904. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
  905. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
  906. mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
  907. mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
  908. mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
  909. mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
  910. mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
  911. mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
  912. mindspore/ops/_op_impl/tbe/lerp.py +0 -38
  913. mindspore/ops/_op_impl/tbe/less.py +0 -41
  914. mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
  915. mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
  916. mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
  917. mindspore/ops/_op_impl/tbe/log.py +0 -40
  918. mindspore/ops/_op_impl/tbe/log1p.py +0 -37
  919. mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
  920. mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
  921. mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
  922. mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
  923. mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
  924. mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
  925. mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
  926. mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
  927. mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
  928. mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
  929. mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
  930. mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
  931. mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
  932. mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
  933. mindspore/ops/_op_impl/tbe/lrn.py +0 -41
  934. mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
  935. mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
  936. mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
  937. mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
  938. mindspore/ops/_op_impl/tbe/matmul.py +0 -53
  939. mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
  940. mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
  941. mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
  942. mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
  943. mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
  944. mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
  945. mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
  946. mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
  947. mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
  948. mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
  949. mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
  950. mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
  951. mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
  952. mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
  953. mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
  954. mindspore/ops/_op_impl/tbe/maximum.py +0 -39
  955. mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
  956. mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
  957. mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
  958. mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
  959. mindspore/ops/_op_impl/tbe/minimum.py +0 -40
  960. mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
  961. mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
  962. mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
  963. mindspore/ops/_op_impl/tbe/mish.py +0 -37
  964. mindspore/ops/_op_impl/tbe/mod.py +0 -41
  965. mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
  966. mindspore/ops/_op_impl/tbe/mul.py +0 -37
  967. mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
  968. mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
  969. mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
  970. mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
  971. mindspore/ops/_op_impl/tbe/neg.py +0 -39
  972. mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
  973. mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
  974. mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
  975. mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
  976. mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
  977. mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
  978. mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
  979. mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
  980. mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
  981. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
  982. mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
  983. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
  984. mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
  985. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
  986. mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
  987. mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
  988. mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
  989. mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
  990. mindspore/ops/_op_impl/tbe/pack.py +0 -58
  991. mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
  992. mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
  993. mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
  994. mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
  995. mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
  996. mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
  997. mindspore/ops/_op_impl/tbe/pdist.py +0 -36
  998. mindspore/ops/_op_impl/tbe/pooling.py +0 -46
  999. mindspore/ops/_op_impl/tbe/population_count.py +0 -38
  1000. mindspore/ops/_op_impl/tbe/pow.py +0 -41
  1001. mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
  1002. mindspore/ops/_op_impl/tbe/prelu.py +0 -37
  1003. mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
  1004. mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
  1005. mindspore/ops/_op_impl/tbe/range.py +0 -39
  1006. mindspore/ops/_op_impl/tbe/real_div.py +0 -38
  1007. mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
  1008. mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
  1009. mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
  1010. mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
  1011. mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
  1012. mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
  1013. mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
  1014. mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
  1015. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
  1016. mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
  1017. mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
  1018. mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
  1019. mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
  1020. mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
  1021. mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
  1022. mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
  1023. mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
  1024. mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
  1025. mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
  1026. mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
  1027. mindspore/ops/_op_impl/tbe/relu.py +0 -39
  1028. mindspore/ops/_op_impl/tbe/relu6.py +0 -38
  1029. mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
  1030. mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
  1031. mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
  1032. mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
  1033. mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
  1034. mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
  1035. mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
  1036. mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
  1037. mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
  1038. mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
  1039. mindspore/ops/_op_impl/tbe/renorm.py +0 -39
  1040. mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
  1041. mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
  1042. mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
  1043. mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
  1044. mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
  1045. mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
  1046. mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
  1047. mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
  1048. mindspore/ops/_op_impl/tbe/rint.py +0 -37
  1049. mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
  1050. mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
  1051. mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
  1052. mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
  1053. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
  1054. mindspore/ops/_op_impl/tbe/roll.py +0 -42
  1055. mindspore/ops/_op_impl/tbe/round.py +0 -38
  1056. mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
  1057. mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
  1058. mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
  1059. mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
  1060. mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
  1061. mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
  1062. mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
  1063. mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
  1064. mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
  1065. mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
  1066. mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
  1067. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
  1068. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
  1069. mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
  1070. mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
  1071. mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
  1072. mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
  1073. mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
  1074. mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
  1075. mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
  1076. mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
  1077. mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
  1078. mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
  1079. mindspore/ops/_op_impl/tbe/select.py +0 -38
  1080. mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
  1081. mindspore/ops/_op_impl/tbe/selu.py +0 -39
  1082. mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
  1083. mindspore/ops/_op_impl/tbe/sgd.py +0 -62
  1084. mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
  1085. mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
  1086. mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
  1087. mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
  1088. mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
  1089. mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
  1090. mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
  1091. mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
  1092. mindspore/ops/_op_impl/tbe/sign.py +0 -38
  1093. mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
  1094. mindspore/ops/_op_impl/tbe/sin.py +0 -37
  1095. mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
  1096. mindspore/ops/_op_impl/tbe/sinh.py +0 -37
  1097. mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
  1098. mindspore/ops/_op_impl/tbe/slice.py +0 -58
  1099. mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
  1100. mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
  1101. mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
  1102. mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
  1103. mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
  1104. mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
  1105. mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
  1106. mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
  1107. mindspore/ops/_op_impl/tbe/softmax.py +0 -37
  1108. mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
  1109. mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
  1110. mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
  1111. mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
  1112. mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
  1113. mindspore/ops/_op_impl/tbe/softplus.py +0 -37
  1114. mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
  1115. mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
  1116. mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
  1117. mindspore/ops/_op_impl/tbe/softsign.py +0 -37
  1118. mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
  1119. mindspore/ops/_op_impl/tbe/sort.py +0 -38
  1120. mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
  1121. mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
  1122. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
  1123. mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
  1124. mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
  1125. mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
  1126. mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
  1127. mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
  1128. mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
  1129. mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
  1130. mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
  1131. mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
  1132. mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
  1133. mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
  1134. mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
  1135. mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
  1136. mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
  1137. mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
  1138. mindspore/ops/_op_impl/tbe/split_d.py +0 -38
  1139. mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
  1140. mindspore/ops/_op_impl/tbe/split_v.py +0 -39
  1141. mindspore/ops/_op_impl/tbe/splitv.py +0 -39
  1142. mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
  1143. mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
  1144. mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
  1145. mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
  1146. mindspore/ops/_op_impl/tbe/square.py +0 -38
  1147. mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
  1148. mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
  1149. mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
  1150. mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
  1151. mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
  1152. mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
  1153. mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
  1154. mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
  1155. mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
  1156. mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
  1157. mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
  1158. mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
  1159. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
  1160. mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
  1161. mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
  1162. mindspore/ops/_op_impl/tbe/sub.py +0 -39
  1163. mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
  1164. mindspore/ops/_op_impl/tbe/tan.py +0 -38
  1165. mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
  1166. mindspore/ops/_op_impl/tbe/tanh.py +0 -37
  1167. mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
  1168. mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
  1169. mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
  1170. mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
  1171. mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
  1172. mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
  1173. mindspore/ops/_op_impl/tbe/tile.py +0 -37
  1174. mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
  1175. mindspore/ops/_op_impl/tbe/top_k.py +0 -42
  1176. mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
  1177. mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
  1178. mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
  1179. mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
  1180. mindspore/ops/_op_impl/tbe/transpose.py +0 -60
  1181. mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
  1182. mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
  1183. mindspore/ops/_op_impl/tbe/trunc.py +0 -39
  1184. mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
  1185. mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
  1186. mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
  1187. mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
  1188. mindspore/ops/_op_impl/tbe/unpack.py +0 -38
  1189. mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
  1190. mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
  1191. mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
  1192. mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
  1193. mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
  1194. mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
  1195. mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
  1196. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
  1197. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
  1198. mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
  1199. mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
  1200. mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
  1201. mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
  1202. mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
  1203. mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
  1204. mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
  1205. mindspore/ops/_tracefunc.py +0 -241
  1206. mindspore/ops/arg_dtype_cast.py +0 -54
  1207. mindspore/ops/silent_check.py +0 -162
  1208. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  1209. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  1210. mindspore/rewrite/api/tree_node_helper.py +0 -60
  1211. mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
  1212. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
  1213. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
  1214. mindspore/rewrite/namespace.py +0 -53
  1215. mindspore-2.2.14.dist-info/RECORD +0 -1924
  1216. {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
  1217. {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
@@ -17,12 +17,13 @@ import os
17
17
  import stat
18
18
  import time
19
19
  import json
20
+ from json import JSONDecodeError
20
21
  import glob
21
- import subprocess
22
- import csv
23
22
  import socket
24
- import shutil
23
+ import multiprocessing
25
24
  from enum import Enum
25
+ from typing import List
26
+ from sys import getsizeof
26
27
  import numpy as np
27
28
 
28
29
  from mindspore import log as logger, context
@@ -30,34 +31,42 @@ from mindspore.context import get_auto_parallel_context
30
31
  from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
31
32
  import mindspore._c_expression as c_expression
32
33
  import mindspore._c_dataengine as cde
34
+ from mindspore._c_expression import _framework_profiler_enable_mi
33
35
  from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
34
- ProfilerIOException, ProfilerException, ProfilerRawFileException
36
+ ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
35
37
  from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
36
38
  from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
37
- from mindspore.profiler.common.util import get_file_path
39
+ from mindspore.profiler.common.util import get_file_path, ProfilerPathManager
40
+ from mindspore.profiler.common.process_pool import MultiProcessPool
38
41
  from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
39
42
  from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
40
43
  from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
44
+ from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
41
45
  from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
42
46
  from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
43
- from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
44
47
  from mindspore.profiler.parser.minddata_parser import MinddataParser
45
48
  from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
46
49
  from mindspore.profiler.parser.minddata_pipeline_parser import \
47
50
  MinddataPipelineParser
48
- from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
49
- from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
51
+ from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
50
52
  from mindspore.profiler.parser.profiler_info import ProfilerInfo
51
53
  from mindspore.common.api import _pynative_executor
52
54
  from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
53
- from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator, AscendMsprofDataGeneratorOld
55
+ from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
54
56
  from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
55
57
  from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
56
58
  from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
57
59
  from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
58
60
  from mindspore.profiler.parser.ascend_cluster_generator import AscendClusterGenerator
59
- from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator, AscendHCCLGeneratorOld
61
+ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
60
62
  from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
63
+ from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
64
+ from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
65
+ from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
66
+ from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
67
+ from mindspore.profiler.parser.ascend_analysis.constant import Constant
68
+ from mindspore.profiler.common.util import timeit
69
+
61
70
 
62
71
  INIT_OP_NAME = 'Default/InitDataSetQueue'
63
72
 
@@ -68,10 +77,24 @@ AICORE_METRICS_DICT = {
68
77
  3: "MemoryL0",
69
78
  4: "ResourceConflictRatio",
70
79
  5: "MemoryUB",
80
+ 6: "L2Cache",
71
81
  -1: "None"
72
82
  }
73
83
 
74
84
 
85
+ class ModelTraingMode(Enum):
86
+ PYNATIVE = 0
87
+ GRAPH = 1
88
+ KERNEL_BY_KERNEL = 2
89
+ UNKNOWN = 3
90
+
91
+
92
+ class ProfilerLevel(Enum):
93
+ Level0 = "Level0"
94
+ Level1 = "Level1"
95
+ Level2 = "Level2"
96
+
97
+
75
98
  class DeviceSupportParam(Enum):
76
99
  """The device target enum."""
77
100
  CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
@@ -81,16 +104,20 @@ class DeviceSupportParam(Enum):
81
104
  ]
82
105
  ASCEND = [
83
106
  'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
84
- 'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'op_time', 'ascend_job_id',
85
- 'profile_framework'
107
+ 'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
108
+ 'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
86
109
  ]
87
110
 
88
111
 
89
112
  ALWAYS_VALID_PARAM = [
90
113
  'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
91
- 'ascend_job_id', 'op_time', 'profile_framework'
114
+ 'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
92
115
  ]
93
116
 
117
+ ANALYSIS_ASYNC_MODE = 'async'
118
+ ANALYSIS_SYNC_MODE = 'sync'
119
+ DEFAULT_MODEL_ID = 4294967295
120
+
94
121
 
95
122
  def _environment_check():
96
123
  if c_expression.security.enable_security():
@@ -136,182 +163,36 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
136
163
  logger.warning("Can not map the start time for item: %s.", row)
137
164
 
138
165
 
139
- def _calculate_dataset_execution_time(input_file, output_file):
140
- r"""
141
- Parse the host info into timeline file, so as to show on UI.
142
-
143
- Args:
144
- input_file: the original host_info file, in csv format.
145
- output_file: the output file, in csv format.
146
- """
147
- input_file = validate_and_normalize_path(input_file)
148
- # execution_time_map is used to store the ExecutionCalculator for each stage.
149
- execution_time_map = {}
150
- # ts_map is used to store the start time of each event_stage_tid_pid.
151
- ts_map = {}
152
- with open(input_file, 'r') as f:
153
- for row in csv.DictReader(f):
154
- try:
155
- module_name = row['module_name']
156
- if module_name != 'Dataset':
157
- continue
158
- _calculate_dataset_item(row, execution_time_map, ts_map)
159
- except KeyError as e:
160
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
161
- continue
162
- if ts_map:
163
- logger.warning("Only start time is record for these items:")
164
- for k, v in ts_map.items():
165
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
166
- output_file = validate_and_normalize_path(output_file)
167
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
168
- modes = stat.S_IWUSR | stat.S_IRUSR
169
- with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
170
- csv_writer = csv.writer(f)
171
- csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
172
- for _, v in execution_time_map.items():
173
- csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
174
- os.chmod(output_file, modes)
175
- logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
176
-
177
-
178
- def _extract_timeline_item(row, time_line, ts_map):
179
- """Process one row, try to extract a timeline item."""
180
- start_end = row['start_end']
181
- event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
182
- # map start and end, put the mapped event into timeline.
183
- if start_end == '1' and event_stage_tid_pid in ts_map:
184
- title = row['event'] + '::' + row['stage']
185
- event = {'name': title, 'cat': row['module_name']}
186
- ts_end = int(row['time_stamp(us)'])
187
- ts = ts_map[event_stage_tid_pid]
188
- event['ts'] = ts
189
- event['dur'] = ts_end - ts
190
- event['ph'] = 'X'
191
- event['pid'] = row['pid']
192
- event['tid'] = row['tid']
193
- event['args'] = {'parent_pid': row['parent_pid']}
194
- time_line.append(event)
195
- del ts_map[event_stage_tid_pid]
196
- elif start_end == '0':
197
- ts = int(row['time_stamp(us)'])
198
- ts_map[event_stage_tid_pid] = ts
199
- # Put the instance event into timeline.
200
- elif start_end == '2':
201
- title = row['event'] + '::' + row['stage']
202
- event = {
203
- 'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
204
- 'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
205
- }
206
- time_line.append(event)
207
- else:
208
- logger.warning("Can not map the start time for item: %s.", row)
209
-
210
-
211
- def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
212
- r"""
213
- Parse the host info into timeline file, so as to show on UI.
214
-
215
- Args:
216
- input_file: the original host_info file, in csv format.
217
- output_timeline_file: the output timeline file, in json format.
218
- output_memory_file: the output memory_usage file, in csv format.
219
- is_develop_user: some data only shown to develop users, other users no need to analyse it.
220
- """
221
- input_file = validate_and_normalize_path(input_file)
222
- time_line = []
223
- # ts_map is used to store the start time of each event_stage_tid_pid
224
- ts_map = {}
225
- memory_header = [
226
- 'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
227
- 'memory_usage(kB)', 'time_stamp(us)'
228
- ]
229
- memory_info = []
230
- with open(input_file, 'r') as f:
231
- for row in csv.DictReader(f):
232
- try:
233
- level = row['level']
234
- if level == '0' and not is_develop_user:
235
- continue
236
- if int(row['time_stamp(us)']) > 0:
237
- _extract_timeline_item(row, time_line, ts_map)
238
- if int(row['memory_usage(kB)']) > 0:
239
- memory_info.append(row)
240
- except KeyError as e:
241
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
242
- continue
243
- if memory_info:
244
- with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
245
- csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
246
- csv_writer.writeheader()
247
- for item in memory_info:
248
- csv_writer.writerow(item)
249
- os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
250
- else:
251
- logger.warning("No memory_usage is record in file: %s", input_file)
252
-
253
- if ts_map:
254
- logger.warning("Only start time is record for these items:")
255
- for k, v in ts_map.items():
256
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
257
- last_dash = k.rfind('_')
258
- if last_dash == -1:
259
- logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
260
- continue
261
- second_last_dash = k.rfind('_', 0, last_dash - 1)
262
- if second_last_dash == -1:
263
- logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
264
- continue
265
- pid = k[last_dash + 1:]
266
- tid = k[second_last_dash + 1: last_dash]
267
- title = k[:second_last_dash]
268
- unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
269
- time_line.append(unfinished_timeline)
270
-
271
- if time_line:
272
- timeline_file = validate_and_normalize_path(output_timeline_file)
273
- with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
274
- json.dump(time_line, json_file)
275
- os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
276
- else:
277
- logger.warning("No valid time_stamp is record in file: %s", input_file)
278
-
279
-
280
- def _ascend_graph_msprof_generator(source_path, model_iteration_dict):
166
+ def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
281
167
  """Executing the msprof export mode."""
282
168
  try:
283
169
  ProfilerInfo.set_export_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
284
- msprof_exporter = AscendMsprofExporter(source_path)
170
+ msprof_exporter = AscendMsprofExporter(mindstudio_profiler_output)
285
171
  flag = msprof_exporter.export(model_iteration_dict)
286
172
  ProfilerInfo.set_export_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
287
173
  return flag
288
-
289
- except ProfilerException as err:
290
- logger.warning(err.message)
174
+ except (ProfilerException, TimeoutError, FileNotFoundError, RuntimeError) as err:
175
+ logger.warning(str(err))
291
176
  return False
292
177
 
293
178
 
294
- def _ascend_graph_msprof_analyse(source_path, flag):
179
+ def _ascend_graph_msprof_analyse(mindstudio_profiler_output):
295
180
  """
296
181
  Ascend graph model msprof data analyse.
297
182
 
298
183
  Returns:
299
- list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
184
+ list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace, df_step_trace_model
300
185
  """
301
- df_op_summary = []
302
- df_op_statistic = []
303
- df_step_trace = []
186
+ res = ([], [], [], [])
304
187
  try:
305
- if flag:
306
- msprof_analyser = AscendMsprofDataGenerator(os.path.join(source_path, 'summary'))
307
- else:
308
- msprof_analyser = AscendMsprofDataGeneratorOld(os.path.join(source_path, 'summary'))
309
- df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
188
+ msprof_analyser = AscendMsprofDataGenerator(mindstudio_profiler_output)
189
+ res = msprof_analyser.parse()
190
+ return res
310
191
  except ProfilerException as err:
311
192
  logger.warning(err.message)
312
193
  finally:
313
194
  pass
314
- return df_op_summary, df_op_statistic, df_step_trace
195
+ return res
315
196
 
316
197
 
317
198
  class Profiler:
@@ -320,24 +201,33 @@ class Profiler:
320
201
  MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
321
202
  and use Profiler.analyse() to stop profiling and analyse the results.
322
203
  Users can visualize the results using the `MindSpore Insight
323
- <https://www.mindspore.cn/mindinsight/docs/en/r2.2/index.html>`_ tool.
204
+ <https://www.mindspore.cn/mindinsight/docs/en/master/index.html>`_ tool.
324
205
  Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
325
206
  correspondence, cluster, etc data analysis.
326
207
 
327
208
  Args:
328
209
  output_path (str, optional): Output data path. Default: ``"./data"`` .
210
+ profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
211
+
212
+ - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
213
+ time of the computational operators on the NPU and communication large operator information.
214
+ - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
215
+ communication mini operator information based on Level0.
216
+ - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
217
+
329
218
  op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
330
219
  profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
331
- a multi devices training,collect when True. Setting this parameter has no effect during single device
220
+ a multi devices training,collect when True. Setting this parameter has no effect during single card
332
221
  training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
333
222
  profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
334
- When using this parameter, `op_time` must be set to True. Default: ``False`` .
223
+ When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
224
+ compilation level is O2 requires collecting from the first step. Default: ``False`` .
335
225
  parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
336
- Default value: ``True`` .
226
+ Default value: ``False`` .
337
227
  start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
338
228
  data collection based on conditions. Default: ``True`` .
339
229
  aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
340
- parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
230
+ parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5, 6],
341
231
  Default: ``0`` , the data items contained in each metric are as follows:
342
232
 
343
233
  - -1: Does not collect AICORE data.
@@ -348,9 +238,15 @@ class Profiler:
348
238
  - 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
349
239
  - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
350
240
  - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
241
+ - 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
242
+ This function only support Atlas A2 training series products.
351
243
 
352
244
  l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
353
245
  Default: ``False`` .
246
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
247
+ collect when True. Default: ``False`` .
248
+ pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
249
+ Default: ``False`` .
354
250
  sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
355
251
  Default: ``True`` .
356
252
 
@@ -360,19 +256,32 @@ class Profiler:
360
256
  - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
361
257
  This method can reduce the impact of adding profiler on overall training time.
362
258
  data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
363
- Default value: ``True`` .
259
+ Default value: ``False`` .
364
260
  timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
365
261
  When using this parameter, `op_time` must be set to True. Default value: ``500`` .
366
262
  profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
367
- ["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
368
- specified profiler directory, which stores the collected memory and time files on the Host side.
369
- Default: "all".
263
+ ["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
264
+ parameter, the op_time parameter must be enabled.
265
+ Default: None.
370
266
 
371
- - "all": Record both host timestamp and host memory usage.
372
- - "time": Only record host timestamp.
373
- - "memory": Only record host memory usage.
267
+ - "all": Record host timestamp.
268
+ - "time": The same as "all".
374
269
  - None: Not record host information.
375
-
270
+ data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
271
+ If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
272
+ directory are retained to save disk space.
273
+ Default value: ``True`` .
274
+ with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
275
+ data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
276
+ profile_framework parameters must be enabled. Default value: ``False`` .
277
+ analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
278
+ data. This parameter is experimental parameter and does not need to be set by the user.
279
+ Default value: ``False`` .
280
+ rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
281
+ experimental parameter and does not need to be set by the user. Default value: ``0`` .
282
+ env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
283
+ This parameter is experimental parameter and does not need to be set by the user.
284
+ Default value: ``False`` .
376
285
  Raises:
377
286
  RuntimeError: When the version of CANN does not match the version of MindSpore,
378
287
  MindSpore cannot parse the generated ascend_job_id directory structure.
@@ -386,6 +295,7 @@ class Profiler:
386
295
  >>> from mindspore import nn
387
296
  >>> import mindspore.dataset as ds
388
297
  >>> from mindspore import Profiler
298
+ >>> from mindspore.profiler import ProfilerLevel
389
299
  >>>
390
300
  >>> class Net(nn.Cell):
391
301
  ... def __init__(self):
@@ -411,7 +321,7 @@ class Profiler:
411
321
  ...
412
322
  ... # Init Profiler
413
323
  ... # Note that the Profiler should be initialized before model.train
414
- ... profiler = Profiler()
324
+ ... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
415
325
  ...
416
326
  ... # Train Model
417
327
  ... net = Net()
@@ -420,16 +330,16 @@ class Profiler:
420
330
  ... # Profiler end
421
331
  ... profiler.analyse()
422
332
  """
423
-
424
- _hwts_output_filename_target = "output_format_data_hwts_"
425
- _opcompute_output_filename_target = "output_op_compute_time_"
426
- _aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
427
- _has_analysed = False
428
333
  _has_initialized = False
429
334
  _ascend_profiling_options = ""
430
335
  _ascend_job_id = ""
336
+ ENABLE_STATUS = "on"
337
+ DISABLE_STATUS = "off"
431
338
 
432
339
  def __init__(self, **kwargs):
340
+ if os.getenv("PROFILING_MODE"):
341
+ raise RuntimeError("Profiling is already enabled by PROFILING_MODE env.")
342
+
433
343
  self._dev_id = None
434
344
  self._cpu_profiler = None
435
345
  self._gpu_profiler = None
@@ -445,13 +355,19 @@ class Profiler:
445
355
  self._rank_size = 1
446
356
  self._rank_id = 0
447
357
  self._ascend_profiler = None
358
+ self.metadata = {}
359
+ self.max_str_len = 4096
360
+ self.max_meta_size = 50 * 1024
448
361
  self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
449
362
  self._parallel_strategy = True
450
363
  self._model_iteration_dict = None
364
+ self._analyse_mode = ANALYSIS_SYNC_MODE
451
365
  _environment_check()
452
366
  # default aicore_metrics type is ArithmeticUtilization
453
367
  self._aicore_metrics_id = 0
454
- self._l2_cache = "off"
368
+ self._l2_cache = self.DISABLE_STATUS
369
+ self._hbm_ddr = self.DISABLE_STATUS
370
+ self._pcie = self.DISABLE_STATUS
455
371
  self._data_process = True
456
372
  self._op_time = True
457
373
  self._profile_communication = False
@@ -462,25 +378,33 @@ class Profiler:
462
378
  self._sync_enable = True
463
379
  self._stop_time = 0
464
380
  self._dynamic_status = False
465
- self._profile_framework = "all"
381
+ self._profile_framework = None
466
382
  self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
383
+ self.profiler_level = None
467
384
  self._pretty_json = False
385
+ self._analyse_only = kwargs.get("analyse_only", False)
386
+ self._data_simplification = kwargs.get("data_simplification", True)
387
+ self._with_stack = False
468
388
  if self._msprof_enable:
469
389
  return
470
- self._start_time = int(time.time() * 1000000)
390
+ self._start_time = int(time.time() * 1e6) # us
391
+ self._monotonic_time = int(time.monotonic() * 1e6) # us
471
392
  logger.info("Profiling: start time: %d", self._start_time)
472
393
  if kwargs.get("env_enable"):
473
394
  self._profiler_init(kwargs)
474
395
  return
475
-
476
396
  Profiler._has_initialized = True
477
397
  # get device_id and device_target
478
- self._get_devid_rankid_and_devtarget()
479
- self._parser_kwargs(kwargs)
480
- self._get_output_path(kwargs)
481
- self._decide_device_target(kwargs)
482
- if self.start_profile:
483
- self.start()
398
+ if self._analyse_only:
399
+ self._device_target = DeviceTarget.ASCEND.value
400
+ self._rank_id = kwargs.get("rank_id", 0)
401
+ else:
402
+ self._get_devid_rankid_and_devtarget()
403
+ self._parser_kwargs(kwargs)
404
+ self._get_output_path(kwargs)
405
+ self._decide_device_target(kwargs)
406
+ if self.start_profile:
407
+ self.start()
484
408
 
485
409
  @staticmethod
486
410
  def _check_output_path(output_path):
@@ -496,9 +420,9 @@ class Profiler:
496
420
  return output_path
497
421
 
498
422
  @staticmethod
499
- def _parse_start_log(input_file):
423
+ def _parse_job_start_time(prof_dir):
500
424
  """
501
- Parse host start log file, get the start time of the job.
425
+ Get the start time of the job.
502
426
 
503
427
  Args:
504
428
  input_file (str): The file path of the host start log file.
@@ -506,34 +430,83 @@ class Profiler:
506
430
  Returns:
507
431
  str, job start time.
508
432
  """
509
-
510
- job_start_time = 0
511
- with open(input_file) as f:
512
- job_start_time = json.load(f).get("collectionTimeBegin")
513
-
514
- return job_start_time
515
-
516
- @staticmethod
517
- def _parse_info_json(info_file):
433
+ try:
434
+ AscendMsprofExporter.check_msprof_env()
435
+ script_path = AscendMsprofExporter.get_msprof_info_path()
436
+ if not script_path:
437
+ logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
438
+ return None
439
+ logger.info("get_msprof_info.py path is : %s", script_path)
440
+ host_dir = os.path.join(prof_dir, 'host')
441
+ cmd = ['python', script_path, '-dir', host_dir]
442
+ outs, _ = AscendMsprofExporter.run_cmd(cmd)
443
+ if not outs:
444
+ logger.warning('Can`t find the msprof info result')
445
+ return None
446
+ result = json.loads(outs)
447
+ if result.get('status', 1) == 1:
448
+ return None
449
+ jor_start_time = result.get('data', {}).get('collection_info', {}).get('Collection start time', None)
450
+ if jor_start_time is not None:
451
+ return float(jor_start_time.strip())
452
+ return None
453
+ except (RuntimeError, JSONDecodeError, AttributeError, TimeoutError, FileNotFoundError) as err:
454
+ logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
455
+ return None
456
+
457
+ @classmethod
458
+ def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
518
459
  """
519
- Parse info log file, get the rank id and device id of the job.
460
+ Analyze training performance data offline, which is invoked after performance data collection is completed.
461
+
520
462
  Args:
521
- input_file (str): The file path of the parse info log file.
463
+ path (str): The profiling data path which need to be analyzed offline.
464
+ There needs to be a profiler directory in this path.
465
+ pretty (bool, optional): Whether to pretty json files. Default: ``False``.
466
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
467
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
468
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
522
469
 
523
- Returns:
524
- rank id, device id
470
+ Examples:
471
+ >>> from mindspore import Profiler
472
+ >>> Profiler.offline_analyse("./profiling_path")
525
473
  """
526
- with open(info_file, "r") as f:
527
- info_dict = json.load(f)
528
-
529
- rank_id = info_dict.get("rank_id", 0)
530
- dev_info = info_dict.get("DeviceInfo", [])
531
- dev_id = dev_info[0].get("id", -1)
532
-
474
+ real_path = os.path.realpath(path)
475
+ PathManager.check_input_directory_path(real_path)
476
+ profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
477
+ if not isinstance(data_simplification, bool):
478
+ logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
479
+ f"but got type {type(data_simplification)}, it will be set to True.")
480
+ data_simplification = True
481
+ if not profiler_parent_path_list:
482
+ raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
483
+ f'single-device profiler data, or multiple subdirectories each containing '
484
+ f'a "profiler" directory for multi-device profiler data. ')
485
+ # get rank id
486
+ rank_list = []
487
+ for parent_path in profiler_parent_path_list:
488
+ profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
489
+ rank_id = ProfilerInfo.get_rank_id(profiler_path)
533
490
  if int(rank_id) < 0:
534
- rank_id = 0
535
-
536
- return str(rank_id), str(dev_id)
491
+ logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
492
+ rank_list.append(rank_id)
493
+ # start offline analyse
494
+ if len(profiler_parent_path_list) == 1:
495
+ PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
496
+ profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
497
+ profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
498
+ else:
499
+ # Multiprocess Parsing
500
+ multiprocessing.set_start_method("fork", force=True)
501
+ process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
502
+ pool = multiprocessing.Pool(processes=process_number)
503
+ for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
504
+ PathManager.check_directory_path_writeable(profiler_parent_path)
505
+ profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
506
+ data_simplification=data_simplification)
507
+ pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
508
+ pool.close()
509
+ pool.join()
537
510
 
538
511
  def op_analyse(self, op_name, device_id=None):
539
512
  """
@@ -560,12 +533,12 @@ class Profiler:
560
533
  >>> # Profiler init.
561
534
  >>> profiler = Profiler()
562
535
  >>> # Train Model or eval Model, taking LeNet5 as an example.
563
- >>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
536
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
564
537
  >>> net = LeNet5()
565
538
  >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
566
539
  >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
567
540
  >>> # Create the dataset taking MNIST as an example.
568
- >>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/mnist.py
541
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
569
542
  >>> dataloader = create_dataset()
570
543
  >>> model = Model(net, loss, optimizer)
571
544
  >>> model.train(5, dataloader, dataset_sink_mode=False)
@@ -600,20 +573,73 @@ class Profiler:
600
573
  return message
601
574
  return op_info
602
575
 
603
- def analyse(self, offline_path=None, pretty=False):
576
+ def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync"):
604
577
  """
605
578
  Collect and analyze training performance data, support calls during and after training. The example shows above.
606
579
 
607
580
  Args:
608
- offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
581
+ offline_path (Union[str, None], optional): The data path which need to be analyzed with offline mode.
609
582
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
610
583
  for online mode. Default: ``None``.
611
584
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
612
- """
613
- self._pretty_json = pretty
614
- self._analyse(offline_path=offline_path)
585
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
586
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
587
+ mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
615
588
 
616
- def _analyse(self, offline_path=None, model_iteration_dict=None):
589
+ - sync: analyse data in current process, it will block the current process.
590
+ - async: analyse data in subprocess, it will not block the current process. Since the parsing process
591
+ will take up extra CPU resources, please enable this mode according to the actual resource situation.
592
+
593
+ Examples:
594
+ >>> from mindspore.train import Callback
595
+ >>> from mindspore import Profiler
596
+ >>> class StopAtStep(Callback):
597
+ ... def __init__(self, start_step=1, stop_step=5):
598
+ ... super(StopAtStep, self).__init__()
599
+ ... self.start_step = start_step
600
+ ... self.stop_step = stop_step
601
+ ... self.profiler = Profiler(start_profile=False)
602
+ ...
603
+ ... def step_begin(self, run_context):
604
+ ... cb_params = run_context.original_args()
605
+ ... step_num = cb_params.cur_step_num
606
+ ... if step_num == self.start_step:
607
+ ... self.profiler.start()
608
+ ...
609
+ ... def step_end(self, run_context):
610
+ ... cb_params = run_context.original_args()
611
+ ... step_num = cb_params.cur_step_num
612
+ ... if step_num == self.stop_step:
613
+ ... self.profiler.stop()
614
+ ...
615
+ ... def end(self, run_context):
616
+ ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
617
+ """
618
+ try:
619
+ if isinstance(pretty, bool):
620
+ self._pretty_json = pretty
621
+ if mode not in [ANALYSIS_SYNC_MODE, ANALYSIS_ASYNC_MODE]:
622
+ logger.warning("For analyse, the parameter mode must be one of ['sync', 'async'], "
623
+ "it will be set to 'sync'.")
624
+ mode = ANALYSIS_SYNC_MODE
625
+ model_iteration_dict = {}
626
+ if step_list is not None and not isinstance(step_list, list):
627
+ raise ProfilerParamTypeErrorException("Parameter step_list must be a list.")
628
+ if step_list:
629
+ if not all(isinstance(step_id, int) for step_id in step_list):
630
+ raise ProfilerParamTypeErrorException("The elements of the parameter step_list must be integers.")
631
+ step_list.sort()
632
+ if step_list[-1] - step_list[0] != len(step_list) - 1:
633
+ err_msg = "The elements of the parameter step_list must be continuous integers."
634
+ raise ProfilerParamTypeErrorException(err_msg)
635
+ model_iteration_dict[DEFAULT_MODEL_ID] = step_list
636
+ if offline_path is not None and not isinstance(offline_path, str):
637
+ raise ProfilerParamTypeErrorException("For analyse, the type of parameter offline_path must be str.")
638
+ self._analyse(offline_path=offline_path, model_iteration_dict=model_iteration_dict, mode=mode)
639
+ except (ProfilerException, RuntimeError, OSError, TypeError, NameError) as err:
640
+ logger.error("Profiler analyse failed: %s", str(err))
641
+
642
+ def _analyse(self, offline_path=None, model_iteration_dict=None, mode=ANALYSIS_SYNC_MODE):
617
643
  """
618
644
  Collect and analyze training performance data, support calls during and after training. The example shows above.
619
645
 
@@ -622,24 +648,24 @@ class Profiler:
622
648
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
623
649
  for online mode. Default: ``None``.
624
650
  model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
651
+ mode (str, optional): Analysis mode. Whether to analyse data in subprocess. Default: ``sync``.
652
+ By default, analyse data in current process.
625
653
  """
626
654
  self._model_iteration_dict = model_iteration_dict
627
-
628
655
  self._init_profiler_info()
629
656
  self._is_support_step_info_collect()
657
+ self._analyse_mode = mode
630
658
  parallel_mode = get_auto_parallel_context("parallel_mode")
631
659
  stage_num = get_auto_parallel_context("pipeline_stages")
632
660
 
633
661
  ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
634
- ProfilerInfo.set_rank_size(self._rank_size)
635
- ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
636
662
  if offline_path:
637
- if self._is_offline_parser():
638
- ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
639
- self._ascend_graph_analyse()
640
- ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
641
- ProfilerInfo.save(self._output_path)
642
- _offline_parse(offline_path)
663
+ # Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
664
+ ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
665
+ ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
666
+ self._ascend_graph_analyse(offline_path=offline_path)
667
+ ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
668
+ ProfilerInfo.save(self._output_path)
643
669
  return
644
670
  if self._msprof_enable:
645
671
  return
@@ -654,21 +680,21 @@ class Profiler:
654
680
  cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
655
681
  if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
656
682
  self._is_heterogeneous = True
683
+
684
+ ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
657
685
  ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
658
686
  if self._device_target and self._device_target == DeviceTarget.CPU.value:
659
687
  self._cpu_analyse()
688
+ if self._profile_framework:
689
+ logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
690
+ "data.")
660
691
 
661
692
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
662
693
  self._gpu_analyse()
663
694
 
664
695
  elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
665
696
  self._ascend_analyse()
666
- if self._profile_framework:
667
- if self._device_target != DeviceTarget.CPU.value:
668
- self._host_info_analyse()
669
- else:
670
- logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
671
- " directory in the output path.")
697
+
672
698
  logger.info("Profiling: all the data have been analyzed.")
673
699
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
674
700
  ProfilerInfo.save(self._output_path)
@@ -715,13 +741,11 @@ class Profiler:
715
741
  else:
716
742
  raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
717
743
 
718
- # No need to start anything if parse profiling data offline
719
- if self._is_offline_parser():
720
- return
721
-
722
744
  self._cpu_profiler.step_profiling_enable(True)
723
745
  if self._op_time:
724
746
  self._cpu_profiler.enable_op_time()
747
+ if self._profile_memory:
748
+ self._cpu_profiler.enable_profile_memory()
725
749
 
726
750
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
727
751
  if self._data_process:
@@ -736,6 +760,14 @@ class Profiler:
736
760
  self._md_profiler.start()
737
761
  self._ascend_graph_start()
738
762
  ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
763
+ ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
764
+ ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
765
+ if context.get_context("mode") == context.GRAPH_MODE:
766
+ jit_config = context.get_jit_config()
767
+ jit_level = jit_config.get("jit_level", "")
768
+ ProfilerInfo.set_jit_level(jit_level)
769
+ if self._profile_framework:
770
+ _framework_profiler_enable_mi()
739
771
 
740
772
  def stop(self):
741
773
  """
@@ -778,10 +810,6 @@ class Profiler:
778
810
  raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
779
811
  "before calling the stop() method.")
780
812
 
781
- # No need to stop anything if parse profiling data offline
782
- if self._is_offline_parser():
783
- return
784
-
785
813
  # Stop data collection after all operators are executed.
786
814
  _pynative_executor.sync()
787
815
 
@@ -798,9 +826,101 @@ class Profiler:
798
826
  self._stop_time = int(time.time() * 10000000)
799
827
  ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
800
828
  self._init_profiler_info()
829
+ ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
801
830
  ProfilerInfo.save(self._output_path)
831
+ self._dump_metadata()
802
832
  logger.info("Profiling: stop time: %d", self._stop_time)
803
833
 
834
+ def add_metadata(self, key: str, value: str):
835
+ """
836
+ Report custom metadata key-value pair data.
837
+
838
+ Args:
839
+ key (str): The key to the metadata.
840
+ value (str): The value to the metadata.
841
+
842
+ Examples:
843
+ >>> from mindspore import Profiler
844
+ >>> # Profiler init.
845
+ >>> profiler = Profiler()
846
+ >>> # Call Profiler add_metadata
847
+ >>> profiler.add_metadata("test_key", "test_value")
848
+ >>> # Profiler end
849
+ >>> profiler.analyse()
850
+ """
851
+ if not isinstance(key, str) or not isinstance(value, str):
852
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
853
+ return
854
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
855
+ logger.warning("Invalid input key or value. Skip this metadata.")
856
+ return
857
+ add_size = getsizeof(key) + getsizeof(value)
858
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
859
+ if key in self.metadata:
860
+ logger.warning(f"{key} is already saved as metadata, override it.")
861
+ self.metadata[key] = value
862
+ else:
863
+ logger.warning("Too many metadata added. Skip this metadata")
864
+
865
+ def add_metadata_json(self, key: str, value: str):
866
+ """
867
+ Report custom metadata key-value pair data with the value as a JSON string data.
868
+
869
+ Args:
870
+ key (str): The key to the metadata.
871
+ value (str): The json str format value to the metadata.
872
+
873
+ Examples:
874
+ >>> import json
875
+ >>> from mindspore import Profiler
876
+ >>> # Profiler init.
877
+ >>> profiler = Profiler()
878
+ >>> # Call Profiler add_metadata_json
879
+ >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
880
+ >>> # Profiler end, metadata will be saved in profiler_metadata.json
881
+ >>> profiler.analyse()
882
+ """
883
+ if not isinstance(key, str) or not isinstance(value, str):
884
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
885
+ return
886
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
887
+ logger.warning("Invalid input key or value. Skip this metadata.")
888
+ return
889
+ add_size = getsizeof(key) + getsizeof(value)
890
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
891
+ try:
892
+ if key in self.metadata:
893
+ logger.warning(f"{key} is already saved as metadata, override it.")
894
+ self.metadata[key] = json.loads(value)
895
+ except ValueError:
896
+ logger.warning("The metadata value must be json format string. Skip this metadata")
897
+ else:
898
+ logger.warning("Too many metadata added. Skip this metadata")
899
+
900
+ def _dump_metadata(self):
901
+ """Dump metadata to file."""
902
+ if not self.metadata:
903
+ return
904
+ FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
905
+ self.metadata.clear()
906
+
907
+ def _check_str_valid(self, input_str: str):
908
+ """Check str length"""
909
+ if len(input_str) > self.max_str_len:
910
+ return False
911
+ return True
912
+
913
+ def _set_ascend_job_id(self, ascend_job_id):
914
+ """Set output_path for offline parsing performance data."""
915
+ if not ascend_job_id:
916
+ return
917
+ self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
918
+ if not os.path.exists(self._ascend_job_id):
919
+ msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
920
+ logger.critical(msg)
921
+ raise ValueError(msg)
922
+ self._output_path, _ = os.path.split(self._ascend_job_id)
923
+
804
924
  def _profiler_init(self, kwargs):
805
925
  """Initialize variables when profiler is enabled by environment variables."""
806
926
  options = kwargs.get("env_enable")
@@ -814,7 +934,7 @@ class Profiler:
814
934
  self._profile_communication = options.get('profile_communication')
815
935
  self._op_time = options.get('op_time')
816
936
  self._device_target = context.get_context("device_target").lower()
817
- self._profile_framework = options.get('profile_framework', 'all')
937
+ self._profile_framework = options.get('profile_framework', None)
818
938
  self._profiler_manager = c_expression.ProfilerManager.get_instance()
819
939
  self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
820
940
  if self._data_process:
@@ -865,32 +985,32 @@ class Profiler:
865
985
 
866
986
  def _gpu_profiler_init(self, kwargs):
867
987
  """Gpu profiler init."""
988
+ self._parse_parameter_for_gpu(kwargs)
868
989
  # Setup and start MindData Profiling
869
990
  if self._data_process:
870
991
  self._md_profiler = cde.GlobalContext.profiling_manager()
871
992
  self._md_profiler.init()
872
- self._parse_parameter_for_gpu(kwargs)
873
993
 
874
994
  gpu_profiler = c_expression.Profiler
875
995
  self._gpu_profiler = gpu_profiler.get_instance("GPU")
876
- self._gpu_profiler.init(self._output_path)
877
- self._gpu_profiler.sync_enable(self._sync_enable)
878
996
  if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
879
997
  self._dev_id = str(get_rank())
880
998
  os.environ['DEVICE_ID'] = self._dev_id
881
999
  self._rank_id = self._dev_id
1000
+ self._gpu_profiler.init(self._output_path, int(self._rank_id))
1001
+ self._gpu_profiler.sync_enable(self._sync_enable)
882
1002
 
883
1003
  def _ascend_profiler_init(self, kwargs):
884
1004
  """Ascend profiler init."""
1005
+ self._parse_parameter_for_ascend(kwargs)
885
1006
  # Setup and start MindData Profiling
886
1007
  if self._data_process:
887
1008
  self._md_profiler = cde.GlobalContext.profiling_manager()
888
1009
  self._md_profiler.init()
889
1010
  self._init_time = int(time.time() * 10000000)
890
1011
  logger.info("Profiling: profiling init time: %d", self._init_time)
891
- self._parse_parameter_for_ascend(kwargs)
892
- os.environ['DEVICE_ID'] = self._dev_id
893
1012
 
1013
+ os.environ['DEVICE_ID'] = self._dev_id
894
1014
  self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
895
1015
  # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
896
1016
  if len(self._ascend_profiling_options) > 2048:
@@ -906,7 +1026,7 @@ class Profiler:
906
1026
  data_path = os.path.join(container_path, "data")
907
1027
  data_path = validate_and_normalize_path(data_path)
908
1028
  if not os.path.exists(data_path):
909
- os.makedirs(data_path, exist_ok=True)
1029
+ os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
910
1030
 
911
1031
  def _construct_profiling_options(self):
912
1032
  """
@@ -919,18 +1039,22 @@ class Profiler:
919
1039
  "output": self._output_path,
920
1040
  "fp_point": fp_point,
921
1041
  "bp_point": bp_point,
922
- "training_trace": "on" if self._op_time else "off",
923
- "task_trace": "on" if self._op_time else "off",
1042
+ "training_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1043
+ "task_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
924
1044
  "aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
925
- "aicpu": "on" if self._data_process or self._op_time else "off",
926
- "profile_memory": "on" if self._op_time and self._profile_memory else "off",
927
- "hccl": "on" if self._op_time and self._profile_communication else "off",
1045
+ "aicpu": self.ENABLE_STATUS if self._data_process or self._op_time else self.DISABLE_STATUS,
1046
+ "profile_memory": self.ENABLE_STATUS if self._op_time and self._profile_memory else self.DISABLE_STATUS,
1047
+ "hccl": self.ENABLE_STATUS if self._op_time and self._profile_communication else self.DISABLE_STATUS,
928
1048
  "l2_cache": self._l2_cache,
929
- "parallel_strategy": "on" if self._parallel_strategy else "off",
930
- "op_time": "on" if self._op_time else "off",
931
- "profile_framework": self._profile_framework
1049
+ "hbm_ddr": self._hbm_ddr,
1050
+ "pcie": self._pcie,
1051
+ "parallel_strategy": self.ENABLE_STATUS if self._parallel_strategy else self.DISABLE_STATUS,
1052
+ "op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1053
+ "profile_framework": self._profile_framework,
1054
+ "profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
1055
+ "with_stack": "on" if self._with_stack else "off"
932
1056
  }
933
-
1057
+ ProfilerInfo.set_profiling_options(profiling_options)
934
1058
  return profiling_options
935
1059
 
936
1060
  def _parse_parameter_for_gpu(self, kwargs):
@@ -961,7 +1085,7 @@ class Profiler:
961
1085
  self._profile_communication = False
962
1086
 
963
1087
  if self._profile_communication:
964
- hccl_option = {"output": self._output_path, "task_trace": "on"}
1088
+ hccl_option = {"output": self._output_path, "task_trace": self.ENABLE_STATUS}
965
1089
  os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
966
1090
 
967
1091
  self._profile_memory = kwargs.pop("profile_memory", False)
@@ -978,7 +1102,7 @@ class Profiler:
978
1102
 
979
1103
  if self._aicore_metrics_id not in AICORE_METRICS_DICT:
980
1104
  logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
981
- f"[-1, 0, 1, 2, 3, 4, 5], but got {self._aicore_metrics_id}, it will be set to 0.")
1105
+ f"[-1, 0, 1, 2, 3, 4, 5, 6], but got {self._aicore_metrics_id}, it will be set to 0.")
982
1106
  self._aicore_metrics_id = 0
983
1107
 
984
1108
  l2_cache_enable = kwargs.pop("l2_cache", False)
@@ -986,38 +1110,43 @@ class Profiler:
986
1110
  logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
987
1111
  f"but got type {type(l2_cache_enable)}, it will be set to False.")
988
1112
  l2_cache_enable = False
989
- if l2_cache_enable:
990
- self._l2_cache = "on"
991
- else:
992
- self._l2_cache = "off"
993
-
994
- self._parallel_strategy = kwargs.pop("parallel_strategy", True)
1113
+ self._l2_cache = self.ENABLE_STATUS if l2_cache_enable else self.DISABLE_STATUS
1114
+
1115
+ hbm_ddr_enable = kwargs.pop("hbm_ddr", False)
1116
+ if not isinstance(hbm_ddr_enable, bool):
1117
+ logger.warning(f"For '{self.__class__.__name__}', the parameter hbm_ddr must be bool, "
1118
+ f"but got type {type(hbm_ddr_enable)}, it will be set to False.")
1119
+ hbm_ddr_enable = False
1120
+ self._hbm_ddr = self.ENABLE_STATUS if hbm_ddr_enable else self.DISABLE_STATUS
1121
+
1122
+ pcie_enable = kwargs.pop("pcie", False)
1123
+ if not isinstance(pcie_enable, bool):
1124
+ logger.warning(f"For '{self.__class__.__name__}', the parameter pcie must be bool, "
1125
+ f"but got type {type(pcie_enable)}, it will be set to False.")
1126
+ pcie_enable = False
1127
+ self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
1128
+
1129
+ self._parallel_strategy = kwargs.pop("parallel_strategy", False)
995
1130
  if not isinstance(self._parallel_strategy, bool):
996
1131
  logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
997
- f"but got type {type(self._parallel_strategy)}, it will be set to True.")
998
- self._parallel_strategy = True
999
-
1000
- task_sink = os.getenv("GRAPH_OP_RUN")
1001
- if task_sink and task_sink == "1":
1002
- logger.warning(f"For '{self.__class__.__name__}', Profiling is not supported if set environment "
1003
- f"'GRAPH_OP_RUN' value to 1, which means model training task is not sink.")
1004
-
1005
- def _set_ascend_job_id(self, ascend_job_id):
1006
- """Set output_path for offline parsing performance data."""
1007
- if not ascend_job_id:
1008
- return
1009
- self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
1010
- if not os.path.exists(self._ascend_job_id):
1011
- msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
1012
- logger.critical(msg)
1013
- raise ValueError(msg)
1014
- self._output_path, _ = os.path.split(self._ascend_job_id)
1015
-
1016
- def _is_offline_parser(self):
1017
- """Return whether offline parser or online parser."""
1018
- if self._device_target and self._device_target == DeviceTarget.ASCEND.value:
1019
- return bool(self._ascend_job_id)
1020
- return False
1132
+ f"but got type {type(self._parallel_strategy)}, it will be set to False.")
1133
+ self._parallel_strategy = False
1134
+
1135
+ self.profiler_level = kwargs.pop("profiler_level", None)
1136
+ if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
1137
+ logger.warning(f"For '{self.__class__.__name__}', the parameter profiler_level must be one of "
1138
+ f"[ProfilerLevel.Level0, ProfilerLevel.Level1, ProfilerLevel.Level2], but got type "
1139
+ f"{type(self.profiler_level)}, it will be set to ProfilerLevel.Level0.")
1140
+ self.profiler_level = ProfilerLevel.Level0
1141
+ elif self.profiler_level == ProfilerLevel.Level0:
1142
+ self._data_process = False
1143
+ self._aicore_metrics_id = -1
1144
+ logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level0, data_process will be set "
1145
+ f"to False and aicore_metrics set to -1.")
1146
+ elif self.profiler_level == ProfilerLevel.Level1:
1147
+ self._data_process = False
1148
+ logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level1, data_process will be set "
1149
+ f"to False.")
1021
1150
 
1022
1151
  def _ascend_analyse(self):
1023
1152
  """Collect and analyse ascend performance data."""
@@ -1029,23 +1158,21 @@ class Profiler:
1029
1158
  self._rank_size = get_group_size()
1030
1159
  else:
1031
1160
  self._rank_size = int(os.getenv('RANK_SIZE', '1'))
1161
+ ProfilerInfo.set_rank_size(self._rank_size)
1032
1162
 
1033
1163
  if self._has_started:
1034
1164
  self.stop()
1035
1165
  else:
1036
1166
  logger.info("No need to stop profiler because profiler has been stopped.")
1167
+ self._ascend_profiler.finalize()
1037
1168
  # export op data before analyse
1038
1169
  self._ascend_graph_analyse()
1039
1170
 
1040
- def _minddata_analyse(self, source_path):
1171
+ def _minddata_analyse(self):
1041
1172
  """Analyse mindadata for ascend graph model."""
1042
1173
  if not self._data_process:
1043
1174
  return
1044
1175
  store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1045
- # Parsing minddata AICPU profiling
1046
- if self._device_target == DeviceTarget.ASCEND.value:
1047
- logger.info("Profiling: analyzing the minddata AICPU data.")
1048
- MinddataParser.execute(source_path, self._output_path, store_id)
1049
1176
 
1050
1177
  # parse minddata pipeline operator and queue
1051
1178
  try:
@@ -1065,6 +1192,16 @@ class Profiler:
1065
1192
  finally:
1066
1193
  pass
1067
1194
 
1195
+ def _minddata_aicpu_analyse(self, source_path, job_id):
1196
+ """Analyse minddata aicpu after ascend."""
1197
+ if not self._data_process:
1198
+ return
1199
+ store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1200
+ # Parsing minddata AICPU profiling
1201
+ if self._device_target == DeviceTarget.ASCEND.value:
1202
+ logger.info("Profiling: analyzing the minddata AICPU data.")
1203
+ MinddataParser.execute(source_path, self._output_path, job_id, store_id)
1204
+
1068
1205
  def _ascend_fpbp_analyse(self, op_summary, steptrace):
1069
1206
  """
1070
1207
  Ascned graph model op analyse.
@@ -1088,7 +1225,7 @@ class Profiler:
1088
1225
  pass
1089
1226
  return points
1090
1227
 
1091
- def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
1228
+ def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status, launch_ops: List):
1092
1229
  """
1093
1230
  Ascend graph model hwts analyse.
1094
1231
 
@@ -1115,12 +1252,12 @@ class Profiler:
1115
1252
  else:
1116
1253
  output_timeline_data_path = None
1117
1254
 
1118
- op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
1255
+ op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status, launch_ops)
1119
1256
  op_analyser.parse()
1120
1257
  op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
1121
1258
  aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
1122
- except ProfilerException as err:
1123
- logger.warning(err.message)
1259
+ except (ProfilerException, RuntimeError) as err:
1260
+ logger.warning(str(err))
1124
1261
  finally:
1125
1262
  pass
1126
1263
 
@@ -1142,19 +1279,22 @@ class Profiler:
1142
1279
  finally:
1143
1280
  pass
1144
1281
 
1145
- def _ascend_timeline_analyse(self, op_summary, steptrace):
1282
+ def _ascend_timeline_analyse(self, op_summary, steptrace, source_path, mindstudio_profiler_output) -> List:
1146
1283
  """Analyse timeline info."""
1147
1284
  try:
1148
1285
  logger.info("Profiling: analyzing the timeline data")
1149
- timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id, self._rank_size,
1150
- context.get_context('mode'))
1151
- timeline_analyser.init_timeline(op_summary, steptrace, pretty=self._pretty_json)
1152
- timeline_analyser.write_timeline(self._timeline_size_limit_byte)
1286
+ timeline_analyser = AscendTimelineGenerator(self._output_path, source_path, mindstudio_profiler_output,
1287
+ self._rank_id, self._rank_size, context.get_context('mode'),
1288
+ self._model_iteration_dict.get(DEFAULT_MODEL_ID))
1289
+ timeline_analyser.parse_cluster_data(op_summary, steptrace)
1290
+ timeline_analyser.parse_timeline_data(pretty=self._pretty_json)
1291
+ timeline_analyser.write_timeline_display()
1153
1292
  timeline_analyser.write_timeline_summary()
1154
1293
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1155
1294
  logger.warning('Fail to write timeline data: %s', err)
1156
1295
  finally:
1157
1296
  pass
1297
+ return timeline_analyser.get_kernel_event_list()
1158
1298
 
1159
1299
  def _ascend_dynamic_net_analyse(self, op_summary):
1160
1300
  """Analyse dynamic shape network info."""
@@ -1168,7 +1308,7 @@ class Profiler:
1168
1308
  dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id, pretty=self._pretty_json)
1169
1309
  dynamic_parser.write_dynamic_shape_data(op_summary)
1170
1310
 
1171
- def _ascend_flops_analyse(self, op_summary):
1311
+ def _ascend_flops_analyse(self, op_summary, launch_ops):
1172
1312
  """Get op FLOPs from op_summary, write output_op_flops_x.csv."""
1173
1313
  if 'vector_fops' not in op_summary.dtype.names and 'cube_fops' not in op_summary.dtype.names:
1174
1314
  logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
@@ -1183,16 +1323,16 @@ class Profiler:
1183
1323
  flops_path = validate_and_normalize_path(flops_path)
1184
1324
  flops_summary_path = validate_and_normalize_path(flops_summary_path)
1185
1325
 
1186
- flops_analyser = AscendFlopsGenerator(op_summary, pretty=self._pretty_json)
1326
+ flops_analyser = AscendFlopsGenerator(op_summary, launch_ops, pretty=self._pretty_json)
1187
1327
  flops_analyser.parse()
1188
1328
  flops_analyser.write(flops_path, flops_summary_path)
1189
1329
 
1190
- except ProfilerException as err:
1191
- logger.warning(err.message)
1330
+ except (ProfilerException, RuntimeError) as err:
1331
+ logger.warning(str(err))
1192
1332
  finally:
1193
1333
  pass
1194
1334
 
1195
- def _ascend_graph_memory_analyse(self, points):
1335
+ def _ascend_graph_memory_analyse(self):
1196
1336
  """Analyse memory usage info."""
1197
1337
  if not self._profile_memory:
1198
1338
  return
@@ -1201,7 +1341,7 @@ class Profiler:
1201
1341
  "PyNative mode currently.")
1202
1342
  try:
1203
1343
  logger.info("Profiling: analyzing the memory usage info.")
1204
- self._analyse_memory_usage(points)
1344
+ self._analyse_memory_usage()
1205
1345
  except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1206
1346
  logger.warning(err.message)
1207
1347
  finally:
@@ -1209,30 +1349,52 @@ class Profiler:
1209
1349
 
1210
1350
  def _ascend_ms_analyze(self, source_path):
1211
1351
  """Ascend ms generate"""
1212
- time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
1352
+
1353
+ timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
1213
1354
  if self._rank_id:
1214
- ascend_ms_path = f"rank-{self._rank_id}_{time_stamp}_ascend_ms"
1355
+ ascend_ms_path = f"rank-{self._rank_id}_{timestamp}_ascend_ms"
1215
1356
  else:
1216
- ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{time_stamp}_ascend_ms"
1217
- self._ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
1218
- if not os.path.exists(self._ascend_ms_path):
1219
- os.makedirs(self._ascend_ms_path, exist_ok=True)
1220
- os.chmod(self._ascend_ms_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1357
+ ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{timestamp}_ascend_ms"
1358
+ ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
1221
1359
 
1222
1360
  dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1223
- ascend_profiler_output_path = os.path.join(self._ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
1224
- os.makedirs(ascend_profiler_output_path, exist_ok=True)
1361
+ ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
1362
+ PathManager.make_dir_safety(ascend_profiler_output_path)
1225
1363
 
1226
1364
  source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
1227
- target_profiler_info_path = os.path.join(self._ascend_ms_path, f"profiler_info_{dev_id}.json")
1228
- shutil.copy(source_profiler_info_path, target_profiler_info_path)
1365
+ target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
1366
+ PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
1367
+
1368
+ source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
1369
+ target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
1370
+ PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
1229
1371
 
1230
1372
  source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
1231
1373
  target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
1232
- shutil.copy(source_timeline_path, target_timeline_path)
1374
+ PathManager.copy_file(source_timeline_path, target_timeline_path)
1375
+
1376
+ src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
1377
+ dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1378
+ PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
1379
+
1380
+ ms_output_path = os.path.realpath(
1381
+ os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1382
+ static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
1383
+ src_static_op_mem_path = glob.glob(static_op_mem_path)
1384
+ if src_static_op_mem_path:
1385
+ dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
1386
+ PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
1387
+
1388
+ src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
1389
+ src_op_statistics_path = glob.glob(src_op_statistics_path)
1390
+ if src_op_statistics_path:
1391
+ dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
1392
+ PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
1233
1393
 
1234
1394
  self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
1235
1395
  self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
1396
+ AscendIntegrateGenerator(source_path, ascend_profiler_output_path).parse()
1397
+ AscendMemoryGenerator(self._output_path, self._rank_id, source_path, ascend_profiler_output_path).parse()
1236
1398
 
1237
1399
  def _ascend_graph_cluster_analyse(self, source_path, ascend_profiler_output_path):
1238
1400
  """Analyse step trace time info"""
@@ -1243,7 +1405,7 @@ class Profiler:
1243
1405
  step_trace_time_path = os.path.join(ascend_profiler_output_path, f'step_trace_time.csv')
1244
1406
  step_trace_time_path = validate_and_normalize_path(step_trace_time_path)
1245
1407
 
1246
- cluster_analyse = AscendClusterGenerator(os.path.join(source_path, 'timeline'))
1408
+ cluster_analyse = AscendClusterGenerator(source_path)
1247
1409
  cluster_analyse.parse()
1248
1410
  cluster_analyse.write(step_trace_time_path)
1249
1411
  except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
@@ -1262,10 +1424,11 @@ class Profiler:
1262
1424
  communication_file_path = os.path.join(ascend_profiler_output_path, f'communication.json')
1263
1425
  communication_file_path = validate_and_normalize_path(communication_file_path)
1264
1426
 
1265
- communication_matrix_file_path = os.path.join(ascend_profiler_output_path, f"communication_matrix.json")
1427
+ communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
1428
+ f"communication_matrix.json")
1266
1429
  communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
1267
1430
 
1268
- analyze_path = os.path.join(os.path.dirname(source_path), 'analyze')
1431
+ analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
1269
1432
  communicate_analyser = AscendCommunicationGenerator(analyze_path)
1270
1433
  communicate_analyser.parse()
1271
1434
  communicate_analyser.write(communication_file_path, communication_matrix_file_path)
@@ -1274,7 +1437,7 @@ class Profiler:
1274
1437
  finally:
1275
1438
  pass
1276
1439
 
1277
- def _ascend_graph_hccl_analyse(self, source_path, steptrace, flag):
1440
+ def _ascend_graph_hccl_analyse(self, mindstudio_profiler_output, steptrace):
1278
1441
  """Analyse hccl profiler info."""
1279
1442
  if not self._profile_communication:
1280
1443
  return
@@ -1288,10 +1451,7 @@ class Profiler:
1288
1451
 
1289
1452
  hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
1290
1453
  hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
1291
- if flag:
1292
- hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'), steptrace)
1293
- else:
1294
- hccl_analyse = AscendHCCLGeneratorOld(os.path.join(source_path, 'timeline'))
1454
+ hccl_analyse = AscendHCCLGenerator(mindstudio_profiler_output, steptrace)
1295
1455
  hccl_analyse.parse()
1296
1456
  hccl_analyse.write(hccl_raw_path)
1297
1457
 
@@ -1300,62 +1460,87 @@ class Profiler:
1300
1460
  finally:
1301
1461
  pass
1302
1462
 
1303
- def _ascend_graph_msadvisor_analyse(self, job_id):
1304
- """Call MSAdvisor function."""
1305
- logger.info("MSAdvisor starts running.")
1306
- msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
1307
- try:
1308
- msadvisor.analyse()
1309
- except FileNotFoundError as err:
1310
- logger.warning("MSAdvisor: command not found,"
1311
- "please check if installed ascend-toolkit and set environment path correctly. %s", err)
1312
- except OSError as err:
1313
- logger.warning("Cannot execute binary file: Exec format error. %s", err)
1314
- except subprocess.CalledProcessError:
1315
- logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
1316
- except (ValueError, ProfilerFileNotFoundException) as err:
1317
- logger.warning("MSAdvisor running failed. %s", err)
1318
- finally:
1319
- pass
1320
- if context.get_context("mode") == context.PYNATIVE_MODE:
1321
- logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
1463
+ def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
1464
+ """Get the mapping between framework operator and device kernel."""
1465
+ if not kernels:
1466
+ return []
1467
+ kernel_map = {}
1468
+ for kernel in kernels:
1469
+ key = kernel.name if kernel.name.startswith('hcom_') else (kernel.name, str(kernel.ts))
1470
+ kernel_map[key] = kernel.parent
1471
+ launch_ops = [None] * len(op_summary)
1472
+ for index, summary in enumerate(op_summary):
1473
+ ts = str(summary['Task Start Time(us)']).strip("\t")
1474
+ name = summary['Op Name']
1475
+ key = name if name.startswith("hcom_") else (name, ts)
1476
+ launch_op = kernel_map.get(key)
1477
+ if not launch_op:
1478
+ continue
1479
+ launch_ops[index] = launch_op.name
1480
+ return launch_ops
1322
1481
 
1323
- def _ascend_graph_analyse(self):
1324
- """Ascend graph mode analyse."""
1325
- self._ascend_profiler.finalize()
1482
+ def _ascend_graph_analyse(self, offline_path=None):
1483
+ if offline_path or self._analyse_mode == ANALYSIS_SYNC_MODE:
1484
+ self._ascend_graph_analyse_inner(offline_path)
1485
+ else:
1486
+ MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
1326
1487
 
1327
- job_id = self._get_profiling_job_id()
1488
+ @timeit("Profiler analyse done")
1489
+ def _ascend_graph_analyse_inner(self, offline_path=None):
1490
+ """Ascend graph mode analyse."""
1491
+ job_id = self._get_profiling_job_id(offline_path)
1328
1492
  if not job_id:
1329
1493
  return
1330
1494
  logger.info("Profiling: job id is %s ", job_id)
1331
1495
 
1332
1496
  self._check_output_path(output_path=self._output_path)
1333
1497
  source_path = os.path.join(self._output_path, job_id)
1334
- self._minddata_analyse(source_path)
1498
+ self._minddata_analyse()
1335
1499
  if self._op_time:
1336
- flag = _ascend_graph_msprof_generator(source_path, self._model_iteration_dict)
1500
+ mindstudio_profiler_output = os.path.realpath(
1501
+ os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1502
+ flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
1337
1503
  if not flag:
1338
1504
  logger.warning('Current driver package not support all export mode, use single export mode, '
1339
1505
  'this may lead to performance degradation. Suggest upgrading the driver package.')
1340
1506
  ProfilerInfo.set_export_flag(flag)
1341
- op_summary, op_statistic, steptrace = _ascend_graph_msprof_analyse(source_path, flag)
1342
- self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status)
1343
- self._ascend_timeline_analyse(op_summary, steptrace)
1507
+ op_summary, op_statistic, steptrace, steptrace_model \
1508
+ = _ascend_graph_msprof_analyse(mindstudio_profiler_output)
1509
+ kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1510
+
1511
+ if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
1512
+ not isinstance(op_statistic, np.ndarray) and not op_statistic:
1513
+ logger.warning('Op statistic data is empty!')
1514
+ return
1515
+
1516
+ launch_ops = self._get_kernel_op_map(op_summary, kernels)
1517
+ self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
1344
1518
  graph_ids = np.unique(op_summary['Model ID']).tolist()
1345
- points = self._ascend_fpbp_analyse(op_summary, steptrace)
1519
+ self._ascend_fpbp_analyse(op_summary, steptrace)
1346
1520
  if len(graph_ids) == 1:
1347
1521
  self._ascend_step_trace_analyse(steptrace)
1522
+ else:
1523
+ self._ascend_step_trace_analyse(steptrace_model)
1348
1524
  if self._dynamic_status:
1349
1525
  self._ascend_dynamic_net_analyse(op_summary)
1350
- self._ascend_flops_analyse(op_summary)
1351
- self._ascend_graph_memory_analyse(points)
1352
- self._ascend_ms_analyze(source_path)
1353
- self._ascend_graph_hccl_analyse(source_path, steptrace, flag)
1354
- self._ascend_graph_msadvisor_analyse(job_id)
1526
+ self._ascend_flops_analyse(op_summary, launch_ops)
1527
+ self._ascend_graph_memory_analyse()
1528
+ self._ascend_ms_analyze(mindstudio_profiler_output)
1529
+ self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
1530
+ self._minddata_aicpu_analyse(self._output_path, job_id)
1355
1531
  ProfilerInfo.set_graph_ids(graph_ids)
1532
+ try:
1533
+ ProfilerInfo.set_data_simplification(self._data_simplification)
1534
+ ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
1535
+ except RuntimeError as err:
1536
+ logger.error('Profilier simplify data failed, %s', str(err))
1356
1537
 
1357
1538
  def _ascend_graph_start(self):
1358
1539
  """Ascend graph mode start profiling."""
1540
+ op_range_file = os.path.join(self._framework_path, "op_range_" + str(self._rank_id))
1541
+ if os.path.exists(op_range_file):
1542
+ os.remove(op_range_file)
1543
+ logger.info("Clear old op range filer.")
1359
1544
  self._ascend_profiler.start()
1360
1545
 
1361
1546
  def _gpu_analyse(self):
@@ -1370,12 +1555,14 @@ class Profiler:
1370
1555
  else:
1371
1556
  self._rank_size = int(os.getenv('RANK_SIZE', '1'))
1372
1557
 
1558
+ ProfilerInfo.set_rank_size(self._rank_size)
1559
+
1373
1560
  if self._has_started:
1374
1561
  self.stop()
1375
1562
  else:
1376
1563
  logger.info("No need to stop profiler because profiler has been stopped.")
1377
1564
 
1378
- self._minddata_analyse(self._output_path)
1565
+ self._minddata_analyse()
1379
1566
 
1380
1567
  try:
1381
1568
  self._analyse_step_relation_info()
@@ -1438,13 +1625,14 @@ class Profiler:
1438
1625
  if self._has_started:
1439
1626
  self.stop()
1440
1627
  else:
1441
- logger.info("No need to stop profiler because profiler has been stopped or profiler has not been started.")
1628
+ logger.info("No need to stop profiler because profiler has been stopped.")
1629
+
1442
1630
  if not self._op_time:
1443
1631
  return
1444
1632
  try:
1445
1633
  timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
1446
1634
  timeline_generator.init_timeline(pretty=self._pretty_json)
1447
- timeline_generator.write_timeline(self._timeline_size_limit_byte)
1635
+ timeline_generator.write_timeline()
1448
1636
  timeline_generator.write_timeline_summary()
1449
1637
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1450
1638
  logger.warning('Fail to write timeline data: %s', err)
@@ -1453,15 +1641,13 @@ class Profiler:
1453
1641
  raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
1454
1642
  "data.")
1455
1643
 
1456
- def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
1457
- is_gpu_kernel_async_launch_flag=False):
1644
+ def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
1458
1645
  """
1459
1646
  Analyse step trace data and save the result.
1460
1647
 
1461
1648
  Args:
1462
- source_path (str): The directory that contains the step trace original data.
1463
- framework_parser (FrameworkParser): The framework parse instance.
1464
1649
  is_training_mode_flag (bool): Whether in training mode or not.
1650
+ is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
1465
1651
  """
1466
1652
  logger.info("Begin to parse step trace.")
1467
1653
  # construct output path
@@ -1492,68 +1678,35 @@ class Profiler:
1492
1678
  logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1493
1679
  logger.info("The point info is: %s", point_info)
1494
1680
 
1495
- return point_info, is_training_mode_flag
1496
- return {}, is_training_mode_flag
1497
-
1498
- # whether keep the first step
1499
- skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
1500
- # recognize inference or training mode
1501
- is_training_mode_flag = framework_parser.check_op_name("Gradients")
1502
- # parser the step trace files and save the result to disk
1503
- source_path = validate_and_normalize_path(source_path)
1504
- parser = AscendStepTraceParser(input_dir=source_path,
1505
- output_file_path=step_trace_intermediate_file_path,
1506
- skip_first_step=skip_first_step_flag,
1507
- is_training_mode=is_training_mode_flag)
1508
- parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
1509
- parser.parse_and_save()
1510
- point_info = parser.record_point_info(point_info_file_path)
1511
-
1512
- # print parser result
1513
- parser.show()
1514
- logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1515
- logger.info("The point info is: %s", point_info)
1516
-
1517
- return point_info, is_training_mode_flag
1518
-
1519
1681
  def _generate_timeline(self, reduce_op_type):
1520
1682
  """Used for gpu, generate timeline info, write to json format file."""
1521
1683
  try:
1522
1684
  timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
1523
1685
  context.get_context("mode"))
1524
1686
  timeline_generator.init_timeline(reduce_op_type)
1525
- self._timeline_meta = timeline_generator.write_timeline(self._timeline_size_limit_byte)
1687
+ self._timeline_meta = timeline_generator.write_timeline()
1526
1688
  timeline_generator.write_timeline_summary()
1689
+ timeline_generator.parse_fwk_data()
1690
+ timeline_generator.write_fwk_timeline()
1527
1691
  return timeline_generator
1528
1692
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1529
1693
  logger.warning('Fail to write timeline data: %s', err)
1530
1694
  raise RuntimeError('Fail to write timeline data.') from err
1531
1695
 
1532
- def _analyse_memory_usage(self, points):
1696
+ def _analyse_memory_usage(self):
1533
1697
  """Analyse memory usage data."""
1534
1698
  integrator = Integrator(self._output_path, self._rank_id)
1535
- aicore_detail_data = integrator.get_aicore_detail_data()
1536
- memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
1537
- memory_parser.init_memory_usage_info(aicore_detail_data, points)
1538
- memory_parser.write_memory_files()
1699
+ integrator.get_aicore_detail_data()
1539
1700
 
1540
- def _get_profiling_job_id(self):
1701
+ def _get_profiling_job_id(self, offline_path):
1541
1702
  """Get profiling job id, which was generated by ada service.
1542
1703
 
1543
1704
  Returns:
1544
- str, profiling job id.
1705
+ str, profiling job id, eg: PROF_XXX/device_*.
1545
1706
  """
1546
1707
 
1547
- if self._is_offline_parser():
1548
- # The self._ascend_job_id directory like "/../PROF***" or "/../JOB***".
1549
- job_id = self._ascend_job_id.rstrip('/').split('/')[-1]
1550
- if job_id.startswith('PROF'):
1551
- device_dir = [dir for dir in os.listdir(self._ascend_job_id) if dir.startswith('device')]
1552
- info_file_path = get_file_path(os.path.join(self._ascend_job_id, device_dir[0]), "info.json")
1553
- training_rank_id, _ = self._parse_info_json(info_file_path)
1554
- self._rank_id = int(training_rank_id)
1555
- return os.path.join(job_id, device_dir[0])
1556
- return job_id
1708
+ if offline_path:
1709
+ self._output_path = os.path.join(offline_path, 'profiler')
1557
1710
 
1558
1711
  job_id = ""
1559
1712
  job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
@@ -1562,16 +1715,12 @@ class Profiler:
1562
1715
  job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
1563
1716
 
1564
1717
  for dir_name in sorted_job_dirs:
1565
- if dir_name.startswith('PROF'):
1566
- prof_dir = os.path.join(self._output_path, dir_name)
1567
- device_dir = [dir for dir in os.listdir(prof_dir) \
1568
- if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
1569
- job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
1570
- else:
1571
- job_dir = os.path.join(self._output_path, dir_name)
1718
+ prof_dir = os.path.join(self._output_path, dir_name)
1719
+ device_dir = [dir for dir in os.listdir(prof_dir) \
1720
+ if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
1721
+ job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
1572
1722
 
1573
- start_file_path = get_file_path(job_dir, "start_info")
1574
- if start_file_path is None:
1723
+ if get_file_path(job_dir, "start_info") is None:
1575
1724
  logger.warning("Find profiling job path %s, but host_start.log not exist, "
1576
1725
  "profiler will ignore this job dir.", job_dir)
1577
1726
  continue
@@ -1582,25 +1731,26 @@ class Profiler:
1582
1731
  "profiler will ignore this job dir.", job_dir)
1583
1732
  continue
1584
1733
 
1585
- job_start_time = self._parse_start_log(start_file_path)
1586
- _, training_device_id = self._parse_info_json(info_file_path)
1734
+ prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
1735
+ prof_device_id = ProfilerInfo.get_device_id(prof_dir)
1736
+ job_start_time = self._parse_job_start_time(prof_dir)
1587
1737
 
1588
- if self._dev_id != training_device_id:
1589
- logger.debug("Find profiling find job path %s, but not current training device id. "
1590
- "Current training device id %s, but job path device id: %s, "
1591
- "profiler will ignore this job dir.", job_dir, self._dev_id, training_device_id)
1592
- continue
1738
+ if offline_path:
1739
+ self._start_time = int(job_start_time)
1740
+ else:
1741
+ if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
1742
+ logger.warning("Find profiling find job path %s, but not current training device id. "
1743
+ "Current training rank id %s, but job path rank id: %s, "
1744
+ "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1745
+ continue
1593
1746
 
1594
- if int(job_start_time) < self._start_time:
1595
- logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
1596
- "start_time(%d), profiler will ignore this job dir.",
1597
- job_dir, int(job_start_time), self._start_time)
1598
- continue
1747
+ if job_start_time < self._start_time:
1748
+ logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
1749
+ "start_time(%d), profiler will ignore this job dir.",
1750
+ job_dir, job_start_time, self._start_time)
1751
+ continue
1599
1752
 
1600
- if dir_name.startswith('PROF'):
1601
- job_id = os.path.join(dir_name, device_dir[0])
1602
- else:
1603
- job_id = dir_name
1753
+ job_id = os.path.join(dir_name, device_dir[0])
1604
1754
  break
1605
1755
 
1606
1756
  if not job_id:
@@ -1700,15 +1850,21 @@ class Profiler:
1700
1850
  self._output_path = validate_and_normalize_path(output_path)
1701
1851
  else:
1702
1852
  output_path = kwargs.pop("output_path")
1853
+ if not isinstance(output_path, str):
1854
+ logger.warning(
1855
+ f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
1856
+ output_path = "data"
1703
1857
  self._output_path = validate_and_normalize_path(output_path)
1704
1858
 
1705
1859
  self._output_path = os.path.join(self._output_path, "profiler")
1706
1860
  if not os.path.exists(self._output_path):
1707
- os.makedirs(self._output_path, exist_ok=True)
1708
- os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1861
+ os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1709
1862
  else:
1710
1863
  logger.warning("The target dir already exists. "
1711
1864
  "There may be some old profiling data, and they will be rewritten in the end.")
1865
+ self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
1866
+ if not os.path.exists(self._framework_path):
1867
+ os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1712
1868
 
1713
1869
  def _parser_kwargs(self, kwargs):
1714
1870
  """Parse kwargs vale."""
@@ -1729,11 +1885,11 @@ class Profiler:
1729
1885
  f"but got type {type(self._op_time)}, it will be set to True.")
1730
1886
  self._op_time = True
1731
1887
 
1732
- self._data_process = kwargs.pop("data_process", True)
1888
+ self._data_process = kwargs.pop("data_process", False)
1733
1889
  if not isinstance(self._data_process, bool):
1734
1890
  logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
1735
- f"but got type {type(self._data_process)}, it will be set to True.")
1736
- self._data_process = True
1891
+ f"but got type {type(self._data_process)}, it will be set to False.")
1892
+ self._data_process = False
1737
1893
 
1738
1894
  timeline_limit = kwargs.pop("timeline_limit", 500)
1739
1895
  if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
@@ -1745,55 +1901,22 @@ class Profiler:
1745
1901
  "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
1746
1902
  timeline_limit = 500
1747
1903
  self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
1748
- self._profile_framework = kwargs.pop("profile_framework", "all")
1749
- if self._profile_framework not in ["memory", "time", "all", None]:
1750
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
1751
- f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
1752
- self._profile_framework = "all"
1753
-
1754
- def _host_info_analyse(self):
1755
- """
1756
- Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
1757
- """
1758
- logger.info("Profiling HostInfo start.")
1759
- host_dir = os.path.join(self._output_path, 'host_info')
1760
- host_dir = validate_and_normalize_path(host_dir)
1761
- if not os.path.exists(host_dir):
1762
- logger.error("Host info directory: %s not exist.", host_dir)
1763
- return
1764
- csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
1765
- json_file_name = 'timeline_' + str(self._rank_id) + '.json'
1766
- memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
1767
- dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
1768
- host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
1769
- timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
1770
- memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
1771
- dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
1772
- _parse_host_info(host_info_file, timeline_file, memory_file)
1773
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
1774
- logger.info("Profile HostInfo finished.")
1775
-
1776
-
1777
- def _offline_parse(offline_path):
1778
- """Parse data in abnormal scenario, only support for host_info at present."""
1779
- logger.info("Profiling HostInfo offline start.")
1780
- host_dir = os.path.join(offline_path, 'profiler', 'host_info')
1781
- host_dir = validate_and_normalize_path(host_dir)
1782
- if not os.path.exists(host_dir):
1783
- logger.error("Host info directory: %s not exist.", host_dir)
1784
- return
1785
- files = os.listdir(host_dir)
1786
- for file in files:
1787
- if not file.startswith("host_info_") or not file.endswith(".csv"):
1788
- continue
1789
- rank_id = file.split('_')[-1].split('.')[0]
1790
- if not rank_id.isdigit():
1791
- logger.info("Cannot get rank_id from file: %s, skip it", file)
1792
- return
1793
- host_info_file = os.path.join(host_dir, file)
1794
- timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
1795
- memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
1796
- dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
1797
- _parse_host_info(host_info_file, timeline_file, memory_file)
1798
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
1799
- logger.info("Profile HostInfo offline finished.")
1904
+ self._profile_framework = kwargs.pop("profile_framework", None)
1905
+ if self._profile_framework not in ["time", "all", None]:
1906
+ logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
1907
+ f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
1908
+ self._profile_framework = None
1909
+
1910
+ if not isinstance(self._data_simplification, bool):
1911
+ logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
1912
+ f"but got type {type(self._data_simplification)}, it will be set to True.")
1913
+ self._data_simplification = True
1914
+
1915
+ self._with_stack = kwargs.pop("with_stack", False)
1916
+ if not isinstance(self._with_stack, bool):
1917
+ logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
1918
+ f"type {type(self._with_stack)}, it will be set to False.")
1919
+ self._with_stack = False
1920
+ if self._with_stack and self._profile_framework not in ["time", "all"]:
1921
+ logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
1922
+ self._with_stack = False