mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +2 -2
  3. mindspore/__init__.py +5 -2
  4. mindspore/_akg/akg/build_module.py +5 -6
  5. mindspore/_akg/akg/composite/build_module.py +49 -16
  6. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  7. mindspore/_akg/akg/config/repository.json +195 -0
  8. mindspore/_akg/akg/global_configs.py +5 -1
  9. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  10. mindspore/_akg/akg/tvm/api.py +4 -3
  11. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  12. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  13. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  14. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  15. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  16. mindspore/_akg/akg/tvm/build_module.py +16 -1
  17. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  18. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  19. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  20. mindspore/_akg/akg/tvm/module.py +1 -2
  21. mindspore/_akg/akg/tvm/stmt.py +2 -2
  22. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  23. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  24. mindspore/_akg/akg/utils/op_dsl.py +17 -1
  25. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  26. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  27. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  28. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  29. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  30. mindspore/_check_jit_forbidden_api.py +5 -1
  31. mindspore/_checkparam.py +79 -62
  32. mindspore/_extends/graph_kernel/__init__.py +0 -1
  33. mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
  34. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  35. mindspore/_extends/graph_kernel/splitter.py +1 -9
  36. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
  37. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  38. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  39. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
  40. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
  41. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  42. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  43. mindspore/_extends/parse/__init__.py +19 -17
  44. mindspore/_extends/parse/namespace.py +7 -36
  45. mindspore/_extends/parse/parser.py +375 -189
  46. mindspore/_extends/parse/resources.py +36 -41
  47. mindspore/_extends/parse/standard_method.py +350 -245
  48. mindspore/_extends/parse/trope.py +2 -12
  49. mindspore/_extends/remote/kernel_build_server.py +24 -7
  50. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  51. mindspore/_install_custom.py +43 -0
  52. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  53. mindspore/amp.py +85 -19
  54. mindspore/bin/cache_admin +0 -0
  55. mindspore/bin/cache_server +0 -0
  56. mindspore/boost/base.py +2 -2
  57. mindspore/boost/boost.py +27 -32
  58. mindspore/boost/boost_cell_wrapper.py +37 -13
  59. mindspore/boost/grad_accumulation.py +1 -1
  60. mindspore/boost/grad_freeze.py +34 -6
  61. mindspore/boost/group_loss_scale_manager.py +15 -14
  62. mindspore/boost/less_batch_normalization.py +28 -3
  63. mindspore/common/__init__.py +15 -11
  64. mindspore/common/_auto_dynamic.py +68 -0
  65. mindspore/common/_jit_fallback_utils.py +111 -0
  66. mindspore/common/_register_for_adapter.py +17 -5
  67. mindspore/common/_register_for_tensor.py +2 -2
  68. mindspore/common/_stub_tensor.py +18 -15
  69. mindspore/common/_utils.py +31 -7
  70. mindspore/common/api.py +269 -101
  71. mindspore/common/auto_dynamic_shape.py +498 -0
  72. mindspore/common/dtype.py +61 -21
  73. mindspore/common/dump.py +9 -7
  74. mindspore/common/initializer.py +106 -76
  75. mindspore/common/jit_config.py +35 -14
  76. mindspore/common/lazy_inline.py +187 -0
  77. mindspore/common/mindir_util.py +101 -0
  78. mindspore/common/mutable.py +10 -13
  79. mindspore/common/parameter.py +246 -55
  80. mindspore/common/seed.py +13 -7
  81. mindspore/common/sparse_tensor.py +29 -33
  82. mindspore/common/tensor.py +907 -251
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +84 -4
  85. mindspore/communication/management.py +160 -88
  86. mindspore/config/op_info.config +99 -75
  87. mindspore/config/super_bar_config.json +36 -4
  88. mindspore/context.py +526 -219
  89. mindspore/dataset/__init__.py +9 -46
  90. mindspore/dataset/audio/__init__.py +4 -19
  91. mindspore/dataset/audio/transforms.py +545 -233
  92. mindspore/dataset/audio/utils.py +21 -18
  93. mindspore/dataset/callback/ds_callback.py +42 -13
  94. mindspore/dataset/core/config.py +158 -100
  95. mindspore/dataset/core/validator_helpers.py +1 -63
  96. mindspore/dataset/debug/debug_hook.py +45 -13
  97. mindspore/dataset/debug/pre_defined_hook.py +5 -5
  98. mindspore/dataset/engine/__init__.py +0 -5
  99. mindspore/dataset/engine/cache_client.py +38 -15
  100. mindspore/dataset/engine/datasets.py +615 -278
  101. mindspore/dataset/engine/datasets_audio.py +154 -283
  102. mindspore/dataset/engine/datasets_standard_format.py +104 -116
  103. mindspore/dataset/engine/datasets_text.py +443 -326
  104. mindspore/dataset/engine/datasets_user_defined.py +251 -164
  105. mindspore/dataset/engine/datasets_vision.py +839 -1443
  106. mindspore/dataset/engine/iterators.py +11 -4
  107. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
  108. mindspore/dataset/engine/obs/util.py +3 -0
  109. mindspore/dataset/engine/offload.py +6 -6
  110. mindspore/dataset/engine/queue.py +15 -14
  111. mindspore/dataset/engine/samplers.py +39 -23
  112. mindspore/dataset/engine/serializer_deserializer.py +22 -6
  113. mindspore/dataset/engine/validators.py +21 -331
  114. mindspore/dataset/text/__init__.py +5 -33
  115. mindspore/dataset/text/transforms.py +334 -165
  116. mindspore/dataset/text/utils.py +215 -145
  117. mindspore/dataset/transforms/__init__.py +1 -1
  118. mindspore/dataset/transforms/c_transforms.py +3 -2
  119. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  120. mindspore/dataset/transforms/transforms.py +174 -71
  121. mindspore/dataset/utils/browse_dataset.py +25 -17
  122. mindspore/dataset/utils/line_reader.py +24 -21
  123. mindspore/dataset/vision/__init__.py +5 -26
  124. mindspore/dataset/vision/c_transforms.py +177 -165
  125. mindspore/dataset/vision/py_transforms.py +114 -119
  126. mindspore/dataset/vision/py_transforms_util.py +54 -51
  127. mindspore/dataset/vision/transforms.py +1127 -381
  128. mindspore/dataset/vision/utils.py +54 -38
  129. mindspore/dataset/vision/validators.py +12 -2
  130. mindspore/experimental/map_parameter.py +38 -4
  131. mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
  132. mindspore/experimental/optim/adam.py +192 -0
  133. mindspore/experimental/optim/adamw.py +181 -0
  134. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  135. mindspore/experimental/optim/optimizer.py +252 -0
  136. mindspore/experimental/optim/sgd.py +147 -0
  137. mindspore/gen_ops.py +273 -0
  138. mindspore/include/OWNERS +1 -2
  139. mindspore/include/api/context.h +21 -1
  140. mindspore/include/api/data_type.h +2 -1
  141. mindspore/include/api/graph.h +0 -15
  142. mindspore/include/api/kernel.h +2 -0
  143. mindspore/include/api/kernel_api.h +37 -12
  144. mindspore/include/api/model.h +29 -42
  145. mindspore/include/api/model_group.h +14 -3
  146. mindspore/include/api/model_parallel_runner.h +18 -2
  147. mindspore/include/api/serialization.h +26 -0
  148. mindspore/include/api/status.h +1 -0
  149. mindspore/include/api/types.h +38 -4
  150. mindspore/include/c_api/ms/abstract.h +67 -0
  151. mindspore/include/c_api/ms/attribute.h +197 -0
  152. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  153. mindspore/include/c_api/ms/base/macros.h +32 -0
  154. mindspore/include/c_api/ms/base/status.h +33 -0
  155. mindspore/include/c_api/ms/base/types.h +282 -0
  156. mindspore/include/c_api/ms/context.h +102 -0
  157. mindspore/include/c_api/ms/graph.h +160 -0
  158. mindspore/include/c_api/ms/node.h +606 -0
  159. mindspore/include/c_api/ms/tensor.h +161 -0
  160. mindspore/include/c_api/ms/value.h +84 -0
  161. mindspore/include/c_api/status_c.h +3 -0
  162. mindspore/include/dataset/constants.h +6 -12
  163. mindspore/include/dataset/execute.h +23 -13
  164. mindspore/include/dataset/text.h +26 -26
  165. mindspore/include/dataset/transforms.h +25 -31
  166. mindspore/include/dataset/vision.h +60 -60
  167. mindspore/include/dataset/vision_ascend.h +5 -6
  168. mindspore/include/dataset/vision_lite.h +17 -17
  169. mindspore/include/mindapi/base/format.h +0 -1
  170. mindspore/include/mindapi/base/type_id.h +2 -1
  171. mindspore/include/mindapi/base/types.h +5 -1
  172. mindspore/lib/libdnnl.so.2 +0 -0
  173. mindspore/lib/libjemalloc.so.2 +0 -0
  174. mindspore/lib/libmindspore.so +0 -0
  175. mindspore/lib/libmindspore_backend.so +0 -0
  176. mindspore/lib/libmindspore_common.so +0 -0
  177. mindspore/lib/libmindspore_core.so +0 -0
  178. mindspore/lib/libmindspore_glog.so.0 +0 -0
  179. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  180. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  181. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  182. mindspore/lib/libmindspore_shared_lib.so +0 -0
  183. mindspore/lib/libmpi_adapter.so +0 -0
  184. mindspore/lib/libnnacl.so +0 -0
  185. mindspore/lib/libopencv_core.so.4.5 +0 -0
  186. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  187. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  188. mindspore/lib/libps_cache.so +0 -0
  189. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  192. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  193. mindspore/lib/plugin/ascend/libakg.so +0 -0
  194. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  195. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  196. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  197. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  198. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  199. mindspore/lib/plugin/cpu/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  201. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  202. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  203. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  204. mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
  205. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  206. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  207. mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
  208. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  209. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  210. mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
  211. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  212. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  213. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  214. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  215. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  216. mindspore/log.py +9 -6
  217. mindspore/mindrecord/filereader.py +33 -4
  218. mindspore/mindrecord/filewriter.py +70 -35
  219. mindspore/mindrecord/mindpage.py +40 -34
  220. mindspore/mindrecord/shardreader.py +1 -1
  221. mindspore/mindrecord/shardsegment.py +1 -1
  222. mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
  223. mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
  224. mindspore/mindrecord/tools/csv_to_mr.py +29 -13
  225. mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
  226. mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
  227. mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
  228. mindspore/nn/cell.py +463 -169
  229. mindspore/nn/dynamic_lr.py +47 -43
  230. mindspore/nn/layer/activation.py +225 -82
  231. mindspore/nn/layer/basic.py +121 -79
  232. mindspore/nn/layer/channel_shuffle.py +21 -21
  233. mindspore/nn/layer/combined.py +33 -26
  234. mindspore/nn/layer/container.py +277 -22
  235. mindspore/nn/layer/conv.py +441 -304
  236. mindspore/nn/layer/dense.py +19 -13
  237. mindspore/nn/layer/embedding.py +62 -49
  238. mindspore/nn/layer/flash_attention.py +264 -0
  239. mindspore/nn/layer/image.py +50 -39
  240. mindspore/nn/layer/math.py +62 -51
  241. mindspore/nn/layer/normalization.py +219 -167
  242. mindspore/nn/layer/padding.py +58 -70
  243. mindspore/nn/layer/pooling.py +334 -287
  244. mindspore/nn/layer/rnn_cells.py +53 -38
  245. mindspore/nn/layer/rnns.py +59 -56
  246. mindspore/nn/layer/thor_layer.py +52 -44
  247. mindspore/nn/layer/timedistributed.py +6 -4
  248. mindspore/nn/layer/transformer.py +284 -164
  249. mindspore/nn/learning_rate_schedule.py +34 -25
  250. mindspore/nn/loss/__init__.py +3 -2
  251. mindspore/nn/loss/loss.py +554 -311
  252. mindspore/nn/optim/ada_grad.py +12 -9
  253. mindspore/nn/optim/adadelta.py +14 -11
  254. mindspore/nn/optim/adafactor.py +19 -16
  255. mindspore/nn/optim/adam.py +62 -47
  256. mindspore/nn/optim/adamax.py +13 -10
  257. mindspore/nn/optim/adasum.py +12 -8
  258. mindspore/nn/optim/asgd.py +10 -9
  259. mindspore/nn/optim/ftrl.py +20 -17
  260. mindspore/nn/optim/lamb.py +16 -12
  261. mindspore/nn/optim/lars.py +8 -6
  262. mindspore/nn/optim/lazyadam.py +25 -20
  263. mindspore/nn/optim/momentum.py +10 -7
  264. mindspore/nn/optim/optimizer.py +61 -9
  265. mindspore/nn/optim/proximal_ada_grad.py +14 -13
  266. mindspore/nn/optim/rmsprop.py +17 -13
  267. mindspore/nn/optim/rprop.py +30 -17
  268. mindspore/nn/optim/sgd.py +40 -23
  269. mindspore/nn/optim/thor.py +24 -26
  270. mindspore/nn/probability/bijector/bijector.py +11 -11
  271. mindspore/nn/probability/bijector/exp.py +1 -1
  272. mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
  273. mindspore/nn/probability/bijector/invert.py +1 -1
  274. mindspore/nn/probability/bijector/power_transform.py +29 -29
  275. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  276. mindspore/nn/probability/bijector/softplus.py +5 -5
  277. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
  278. mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
  279. mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
  280. mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
  281. mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
  282. mindspore/nn/probability/distribution/_utils/utils.py +1 -1
  283. mindspore/nn/probability/distribution/bernoulli.py +9 -9
  284. mindspore/nn/probability/distribution/beta.py +8 -8
  285. mindspore/nn/probability/distribution/categorical.py +23 -15
  286. mindspore/nn/probability/distribution/cauchy.py +5 -6
  287. mindspore/nn/probability/distribution/distribution.py +3 -3
  288. mindspore/nn/probability/distribution/exponential.py +4 -4
  289. mindspore/nn/probability/distribution/gamma.py +10 -10
  290. mindspore/nn/probability/distribution/geometric.py +8 -8
  291. mindspore/nn/probability/distribution/gumbel.py +8 -9
  292. mindspore/nn/probability/distribution/half_normal.py +5 -5
  293. mindspore/nn/probability/distribution/laplace.py +5 -5
  294. mindspore/nn/probability/distribution/log_normal.py +12 -11
  295. mindspore/nn/probability/distribution/logistic.py +8 -8
  296. mindspore/nn/probability/distribution/normal.py +6 -5
  297. mindspore/nn/probability/distribution/poisson.py +10 -11
  298. mindspore/nn/probability/distribution/student_t.py +8 -9
  299. mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
  300. mindspore/nn/probability/distribution/uniform.py +11 -11
  301. mindspore/nn/reinforcement/tensor_array.py +2 -2
  302. mindspore/nn/sparse/sparse.py +9 -9
  303. mindspore/nn/wrap/cell_wrapper.py +188 -63
  304. mindspore/nn/wrap/grad_reducer.py +21 -12
  305. mindspore/nn/wrap/loss_scale.py +136 -49
  306. mindspore/numpy/__init__.py +4 -4
  307. mindspore/numpy/array_creations.py +55 -56
  308. mindspore/numpy/array_ops.py +134 -35
  309. mindspore/numpy/logic_ops.py +66 -20
  310. mindspore/numpy/math_ops.py +142 -139
  311. mindspore/numpy/utils_const.py +2 -2
  312. mindspore/offline_debug/convert_async.py +2 -2
  313. mindspore/ops/_grad_experimental/__init__.py +7 -5
  314. mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
  315. mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
  316. mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
  317. mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
  318. mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
  319. mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
  320. mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
  321. mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
  322. mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
  323. mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
  324. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
  325. mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
  326. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  327. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  328. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
  329. mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
  330. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
  331. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
  332. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
  333. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
  334. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  335. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
  336. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
  337. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
  338. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  339. mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
  340. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  341. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  342. mindspore/ops/_op_impl/aicpu/cast.py +52 -0
  343. mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
  344. mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
  345. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  346. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
  347. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  348. mindspore/ops/_op_impl/aicpu/eye.py +4 -4
  349. mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
  350. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
  351. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  352. mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
  353. mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
  354. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  355. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  356. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  357. mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
  358. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
  359. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  360. mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
  361. mindspore/ops/_op_impl/aicpu/median.py +1 -0
  362. mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
  363. mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
  364. mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
  365. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
  366. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  367. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  368. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  369. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  370. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  371. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
  372. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
  373. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
  374. mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
  375. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  376. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  377. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  378. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  379. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
  380. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
  381. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  382. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  383. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  384. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  385. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  386. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
  387. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
  388. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
  389. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
  390. mindspore/ops/_op_impl/tbe/__init__.py +6 -4
  391. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  392. mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
  393. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
  394. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
  395. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
  396. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
  397. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
  398. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  399. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
  400. mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
  401. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
  402. mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
  403. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
  404. mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
  405. mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
  406. mindspore/ops/_op_impl/tbe/im2col.py +4 -4
  407. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  408. mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
  409. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
  410. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
  411. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  412. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
  413. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  414. mindspore/ops/_primitive_cache.py +1 -1
  415. mindspore/ops/_tracefunc.py +241 -0
  416. mindspore/ops/_utils/utils.py +10 -2
  417. mindspore/ops/_vmap/vmap_array_ops.py +5 -3
  418. mindspore/ops/_vmap/vmap_base.py +5 -4
  419. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  420. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  421. mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
  422. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  423. mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
  424. mindspore/ops/arg_dtype_cast.py +54 -0
  425. mindspore/ops/composite/__init__.py +7 -5
  426. mindspore/ops/composite/base.py +78 -34
  427. mindspore/ops/composite/math_ops.py +5 -695
  428. mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
  429. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
  430. mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
  431. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  432. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  433. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
  434. mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
  435. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
  436. mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
  437. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
  438. mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
  439. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
  440. mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
  441. mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
  442. mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
  443. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
  444. mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
  445. mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
  446. mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
  447. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  448. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  449. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
  450. mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
  451. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
  452. mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
  453. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  454. mindspore/ops/deprecated.py +304 -0
  455. mindspore/ops/function/__init__.py +41 -4
  456. mindspore/ops/function/array_func.py +1108 -467
  457. mindspore/ops/function/clip_func.py +94 -27
  458. mindspore/ops/function/debug_func.py +3 -1
  459. mindspore/ops/function/grad/grad_func.py +82 -73
  460. mindspore/ops/function/image_func.py +28 -12
  461. mindspore/ops/function/linalg_func.py +135 -39
  462. mindspore/ops/function/math_func.py +3779 -894
  463. mindspore/ops/function/nn_func.py +1584 -657
  464. mindspore/ops/function/parameter_func.py +13 -3
  465. mindspore/ops/function/random_func.py +247 -153
  466. mindspore/ops/function/sparse_func.py +14 -11
  467. mindspore/ops/function/sparse_unary_func.py +173 -47
  468. mindspore/ops/function/spectral_func.py +8 -4
  469. mindspore/ops/function/vmap_func.py +8 -7
  470. mindspore/ops/functional.py +47 -16
  471. mindspore/ops/op_info_register.py +346 -86
  472. mindspore/ops/operations/__init__.py +38 -22
  473. mindspore/ops/operations/_grad_ops.py +145 -149
  474. mindspore/ops/operations/_inner_ops.py +298 -56
  475. mindspore/ops/operations/_ms_kernel.py +3 -3
  476. mindspore/ops/operations/_quant_ops.py +24 -28
  477. mindspore/ops/operations/_rl_inner_ops.py +9 -7
  478. mindspore/ops/operations/_scalar_ops.py +115 -0
  479. mindspore/ops/operations/_sequence_ops.py +148 -10
  480. mindspore/ops/operations/_tensor_array.py +1 -1
  481. mindspore/ops/operations/_thor_ops.py +2 -2
  482. mindspore/ops/operations/array_ops.py +1239 -561
  483. mindspore/ops/operations/comm_ops.py +166 -90
  484. mindspore/ops/operations/control_ops.py +3 -3
  485. mindspore/ops/operations/custom_ops.py +124 -102
  486. mindspore/ops/operations/debug_ops.py +24 -11
  487. mindspore/ops/operations/image_ops.py +86 -71
  488. mindspore/ops/operations/inner_ops.py +18 -13
  489. mindspore/ops/operations/linalg_ops.py +30 -11
  490. mindspore/ops/operations/math_ops.py +1730 -435
  491. mindspore/ops/operations/nn_ops.py +1953 -943
  492. mindspore/ops/operations/other_ops.py +65 -43
  493. mindspore/ops/operations/random_ops.py +258 -98
  494. mindspore/ops/operations/rl_ops.py +4 -36
  495. mindspore/ops/operations/sparse_ops.py +38 -33
  496. mindspore/ops/operations/spectral_ops.py +8 -4
  497. mindspore/ops/primitive.py +66 -44
  498. mindspore/ops/signature.py +5 -5
  499. mindspore/parallel/_auto_parallel_context.py +80 -19
  500. mindspore/parallel/_cost_model_context.py +42 -0
  501. mindspore/parallel/_offload_context.py +162 -72
  502. mindspore/parallel/_parallel_serialization.py +2 -2
  503. mindspore/parallel/_ps_context.py +16 -4
  504. mindspore/parallel/_recovery_context.py +2 -1
  505. mindspore/parallel/_tensor.py +15 -13
  506. mindspore/parallel/_transformer/layers.py +8 -6
  507. mindspore/parallel/_transformer/loss.py +1 -0
  508. mindspore/parallel/_transformer/moe.py +7 -7
  509. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  510. mindspore/parallel/_transformer/transformer.py +34 -14
  511. mindspore/parallel/_utils.py +36 -14
  512. mindspore/parallel/algo_parameter_config.py +114 -20
  513. mindspore/parallel/checkpoint_transform.py +16 -18
  514. mindspore/parallel/shard.py +16 -13
  515. mindspore/profiler/__init__.py +1 -1
  516. mindspore/profiler/common/struct_type.py +3 -3
  517. mindspore/profiler/common/util.py +3 -2
  518. mindspore/profiler/envprofiling.py +11 -4
  519. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  520. mindspore/profiler/parser/ascend_flops_generator.py +94 -0
  521. mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
  522. mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
  523. mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
  524. mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
  525. mindspore/profiler/parser/ascend_op_generator.py +276 -0
  526. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  527. mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
  528. mindspore/profiler/parser/base_timeline_generator.py +11 -7
  529. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
  530. mindspore/profiler/parser/flops_parser.py +15 -11
  531. mindspore/profiler/parser/framework_parser.py +92 -73
  532. mindspore/profiler/parser/hccl_parser.py +16 -12
  533. mindspore/profiler/parser/integrator.py +22 -11
  534. mindspore/profiler/parser/memory_usage_parser.py +36 -11
  535. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  536. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  537. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  538. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  539. mindspore/profiler/parser/optime_parser.py +1 -1
  540. mindspore/profiler/parser/profiler_info.py +4 -5
  541. mindspore/profiler/parser/step_trace_parser.py +11 -14
  542. mindspore/profiler/profiling.py +678 -377
  543. mindspore/rewrite/api/node.py +211 -54
  544. mindspore/rewrite/api/node_type.py +5 -0
  545. mindspore/rewrite/api/pattern_engine.py +22 -23
  546. mindspore/rewrite/api/scoped_value.py +20 -17
  547. mindspore/rewrite/api/symbol_tree.py +252 -106
  548. mindspore/rewrite/api/tree_node_helper.py +3 -0
  549. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  550. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  551. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  552. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
  553. mindspore/rewrite/common/rewrite_elog.py +5 -1
  554. mindspore/rewrite/namer.py +51 -51
  555. mindspore/rewrite/namespace.py +14 -5
  556. mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
  557. mindspore/rewrite/node/call_function.py +79 -0
  558. mindspore/rewrite/node/cell_container.py +135 -0
  559. mindspore/rewrite/node/control_flow.py +88 -0
  560. mindspore/rewrite/{node.py → node/node.py} +313 -247
  561. mindspore/rewrite/node/node_manager.py +254 -0
  562. mindspore/rewrite/node/node_topological_manager.py +243 -0
  563. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  564. mindspore/rewrite/parsers/assign_parser.py +225 -239
  565. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  566. mindspore/rewrite/parsers/class_def_parser.py +179 -218
  567. mindspore/rewrite/parsers/constant_parser.py +9 -6
  568. mindspore/rewrite/parsers/container_parser.py +9 -7
  569. mindspore/rewrite/parsers/for_parser.py +36 -15
  570. mindspore/rewrite/parsers/function_def_parser.py +23 -20
  571. mindspore/rewrite/parsers/if_parser.py +28 -24
  572. mindspore/rewrite/parsers/module_parser.py +202 -25
  573. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  574. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  575. mindspore/rewrite/parsers/return_parser.py +6 -6
  576. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  577. mindspore/rewrite/sparsify/sparsify.py +4 -1
  578. mindspore/rewrite/sparsify/utils.py +11 -5
  579. mindspore/rewrite/symbol_tree.py +577 -732
  580. mindspore/rewrite/symbol_tree_builder.py +9 -175
  581. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  582. mindspore/run_check/_check_version.py +46 -39
  583. mindspore/run_check/run_check.py +3 -2
  584. mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
  585. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  586. mindspore/scipy/__init__.py +1 -1
  587. mindspore/scipy/linalg.py +67 -61
  588. mindspore/scipy/ops.py +5 -41
  589. mindspore/scipy/ops_grad.py +3 -2
  590. mindspore/scipy/ops_wrapper.py +5 -5
  591. mindspore/scipy/optimize/line_search.py +8 -8
  592. mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
  593. mindspore/scipy/optimize/minimize.py +16 -12
  594. mindspore/scipy/utils.py +1 -52
  595. mindspore/scipy/utils_const.py +4 -4
  596. mindspore/train/__init__.py +4 -4
  597. mindspore/train/_utils.py +13 -5
  598. mindspore/train/amp.py +410 -148
  599. mindspore/train/anf_ir_pb2.py +16 -4
  600. mindspore/train/callback/_backup_and_restore.py +8 -11
  601. mindspore/train/callback/_callback.py +80 -3
  602. mindspore/train/callback/_checkpoint.py +82 -51
  603. mindspore/train/callback/_early_stop.py +12 -15
  604. mindspore/train/callback/_history.py +1 -1
  605. mindspore/train/callback/_lambda_callback.py +13 -13
  606. mindspore/train/callback/_landscape.py +21 -17
  607. mindspore/train/callback/_loss_monitor.py +9 -10
  608. mindspore/train/callback/_on_request_exit.py +16 -33
  609. mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
  610. mindspore/train/callback/_summary_collector.py +44 -30
  611. mindspore/train/callback/_time_monitor.py +62 -12
  612. mindspore/train/data_sink.py +10 -16
  613. mindspore/train/dataset_helper.py +154 -86
  614. mindspore/train/loss_scale_manager.py +14 -9
  615. mindspore/train/metrics/__init__.py +10 -2
  616. mindspore/train/metrics/accuracy.py +1 -1
  617. mindspore/train/metrics/auc.py +1 -1
  618. mindspore/train/metrics/bleu_score.py +2 -2
  619. mindspore/train/metrics/confusion_matrix.py +14 -14
  620. mindspore/train/metrics/cosine_similarity.py +3 -3
  621. mindspore/train/metrics/dice.py +1 -1
  622. mindspore/train/metrics/fbeta.py +1 -1
  623. mindspore/train/metrics/hausdorff_distance.py +8 -6
  624. mindspore/train/metrics/mean_surface_distance.py +5 -4
  625. mindspore/train/metrics/metric.py +49 -17
  626. mindspore/train/metrics/occlusion_sensitivity.py +4 -4
  627. mindspore/train/metrics/perplexity.py +1 -1
  628. mindspore/train/metrics/precision.py +2 -2
  629. mindspore/train/metrics/recall.py +2 -3
  630. mindspore/train/metrics/roc.py +7 -7
  631. mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
  632. mindspore/train/metrics/topk.py +7 -4
  633. mindspore/train/mind_ir_pb2.py +193 -48
  634. mindspore/train/model.py +377 -133
  635. mindspore/train/serialization.py +697 -245
  636. mindspore/train/summary/_summary_adapter.py +5 -2
  637. mindspore/train/summary/_writer_pool.py +4 -3
  638. mindspore/train/summary/summary_record.py +25 -23
  639. mindspore/train/train_thor/convert_utils.py +39 -23
  640. mindspore/train/train_thor/dataset_helper.py +4 -3
  641. mindspore/train/train_thor/model_thor.py +8 -8
  642. mindspore/version.py +1 -1
  643. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
  644. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
  645. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  646. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  647. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  648. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  649. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  650. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  651. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  652. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  653. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  654. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  655. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  656. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  657. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  658. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  659. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  660. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  661. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  662. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  663. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  664. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  665. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  666. mindspore/_extends/graph_kernel/expander.py +0 -80
  667. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
  668. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  669. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  670. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  671. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  672. mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
  673. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  674. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  675. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  676. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  677. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  678. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  679. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  680. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  681. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  682. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  683. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  684. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  685. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  686. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  687. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  688. mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
  689. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  690. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  691. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  692. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  693. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  694. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  695. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  696. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  697. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  698. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  699. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  700. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  701. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  702. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  703. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  704. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  705. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  706. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  707. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  708. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  709. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  710. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  711. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  712. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  713. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  714. mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
  715. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  716. mindspore/_extends/parse/jit_fallback_modules.py +0 -51
  717. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  718. mindspore/dataset/engine/graphdata.py +0 -1586
  719. mindspore/include/api/net.h +0 -142
  720. mindspore/ops/_grad/grad_array_ops.py +0 -1347
  721. mindspore/ops/_grad/grad_clip_ops.py +0 -84
  722. mindspore/ops/_grad/grad_debug_ops.py +0 -68
  723. mindspore/ops/_grad/grad_inner_ops.py +0 -235
  724. mindspore/ops/_grad/grad_math_ops.py +0 -1684
  725. mindspore/ops/_grad/grad_nn_ops.py +0 -1529
  726. mindspore/ops/_grad/grad_other_ops.py +0 -89
  727. mindspore/ops/_grad/grad_sequence_ops.py +0 -296
  728. mindspore/ops/_grad/grad_sparse.py +0 -323
  729. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
  730. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
  731. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  732. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  733. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  734. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
  735. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
  736. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
  737. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
  738. mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
  739. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
  740. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
  741. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  742. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
  743. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  744. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
  745. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  746. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
  747. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
  748. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
  749. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  750. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  751. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
  752. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
  753. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
  754. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
  755. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
  756. mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
  757. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
  758. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
  759. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
  760. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  761. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
  762. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  763. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  764. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
  765. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
  766. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
  767. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  768. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  769. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  770. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
  771. mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
  772. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  773. mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
  774. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
  775. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
  776. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
  777. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
  778. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
  779. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
  780. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  781. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
  782. mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
  783. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
  784. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
  785. mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
  786. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  787. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
  788. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
  789. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
  790. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
  791. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
  792. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
  793. mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
  794. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  795. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
  796. mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
  797. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
  798. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
  799. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
  800. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
  801. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
  802. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
  803. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
  804. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
  805. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
  806. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
  807. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  808. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  809. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  810. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
  811. mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
  812. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  813. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  814. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
  815. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
  816. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
  817. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
  818. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  819. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  820. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  821. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
  822. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
  823. mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
  824. mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
  825. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
  826. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  827. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
  828. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
  829. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
  830. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
  831. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
  832. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
  833. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
  834. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
  835. mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
  836. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  837. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  838. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
  839. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
  840. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
  841. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  842. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
  843. mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
  844. mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
  845. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
  846. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  847. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
  848. mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
  849. mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
  850. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
  851. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  852. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
  853. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
  854. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  855. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
  856. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
  857. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  858. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  859. mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
  860. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
  861. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
  862. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
  863. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
  864. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  865. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
  866. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
  867. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
  868. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
  869. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  870. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  871. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
  872. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
  873. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
  874. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
  875. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
  876. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
  877. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
  878. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
  879. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  880. mindspore/rewrite/node_visitor.py +0 -44
  881. mindspore/rewrite/topological_manager.py +0 -203
  882. mindspore/scipy/sparse/linalg.py +0 -192
  883. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  884. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2020-2023 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -19,38 +19,41 @@ import time
19
19
  import json
20
20
  import glob
21
21
  import subprocess
22
+ import csv
22
23
  from enum import Enum
24
+ import numpy as np
23
25
 
24
26
  from mindspore import log as logger, context
25
- from mindspore.communication.management import GlobalComm, get_rank, get_group_size
27
+ from mindspore.context import get_auto_parallel_context
28
+ from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
26
29
  import mindspore._c_expression as c_expression
27
30
  import mindspore._c_dataengine as cde
28
31
  from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
29
32
  ProfilerIOException, ProfilerException, ProfilerRawFileException
30
33
  from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
31
34
  from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
32
- from mindspore.profiler.common.util import get_file_path, fwrite_format
33
- from mindspore.profiler.common.validator.validate_path import \
34
- validate_and_normalize_path
35
- from mindspore.profiler.parser.aicpu_data_parser import DataPreProcessParser
36
- from mindspore.profiler.parser.framework_parser import FrameworkParser, GpuFrameWorkParser, DynamicFrameWorkParser
37
- from mindspore.profiler.parser.hwts_log_parser import HWTSLogParser
35
+ from mindspore.profiler.common.util import get_file_path
36
+ from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
37
+ from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
38
38
  from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
39
39
  from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
40
40
  from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
41
41
  from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
42
42
  from mindspore.profiler.parser.minddata_parser import MinddataParser
43
43
  from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
44
- from mindspore.profiler.parser.flops_parser import FlopsParser
45
44
  from mindspore.profiler.parser.minddata_pipeline_parser import \
46
45
  MinddataPipelineParser
47
- from mindspore.profiler.parser.optime_parser import OPComputeTimeParser
48
46
  from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
49
- from mindspore.profiler.parser.hccl_parser import HcclParser
50
- from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
51
47
  from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
52
48
  from mindspore.profiler.parser.profiler_info import ProfilerInfo
53
49
  from mindspore.common.api import _pynative_executor
50
+ from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
51
+ from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
52
+ from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
53
+ from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
54
+ from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
55
+ from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
56
+ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
54
57
 
55
58
  INIT_OP_NAME = 'Default/InitDataSetQueue'
56
59
 
@@ -67,14 +70,22 @@ AICORE_METRICS_DICT = {
67
70
 
68
71
  class DeviceSupportParam(Enum):
69
72
  """The device target enum."""
70
- CPU = ['start', 'start_profile', 'output_path', 'timeline_limit']
71
- GPU = ['start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'sync_enable', 'op_time']
72
- ASCEND = ['start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
73
- 'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'op_time', 'ascend_job_id']
73
+ CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
74
+ GPU = [
75
+ 'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'sync_enable', 'op_time',
76
+ 'profile_framework'
77
+ ]
78
+ ASCEND = [
79
+ 'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
80
+ 'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'op_time', 'ascend_job_id',
81
+ 'profile_framework'
82
+ ]
74
83
 
75
84
 
76
- ALWAYS_VALID_PARAM = ['start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
77
- 'ascend_job_id', 'op_time']
85
+ ALWAYS_VALID_PARAM = [
86
+ 'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
87
+ 'ascend_job_id', 'op_time', 'profile_framework'
88
+ ]
78
89
 
79
90
 
80
91
  def _environment_check():
@@ -82,30 +93,241 @@ def _environment_check():
82
93
  raise RuntimeError("Profiler is not supported when MindSpore is compiled with \'-s on\'.")
83
94
 
84
95
 
96
+ class ExecutionCalculator:
97
+ """Calculate the average execution time and counts for each stage."""
98
+
99
+ def __init__(self, event, stage, custom_info):
100
+ self.event = event
101
+ self.stage = stage
102
+ self.custom_info = custom_info
103
+ self.count = 0
104
+ self.average_execution = 0
105
+
106
+
107
+ def _calculate_dataset_item(row, execution_time_map, ts_map):
108
+ """Calculate dataset execution time for one row."""
109
+ start_end = row['start_end']
110
+ event = row['event']
111
+ stage = row['stage']
112
+ custom_info = row['custom_info']
113
+ event_stage_tid_pid = event + '_' + stage + '_' + row['tid'] + '_' + row['pid']
114
+ if start_end == '1' and event_stage_tid_pid in ts_map:
115
+ title = event + '::' + stage + '::' + custom_info
116
+ ts_end = int(row['time_stamp(us)'])
117
+ ts = ts_map[event_stage_tid_pid]
118
+ dur = ts_end - ts
119
+ if title not in execution_time_map:
120
+ execution_time_map[title] = ExecutionCalculator(event=event, stage=stage, custom_info=custom_info)
121
+ execution_time_map[title].count += 1
122
+ if execution_time_map[title].count != 0:
123
+ execution_time_map[title].average_execution += \
124
+ (dur - execution_time_map[title].average_execution) / execution_time_map[title].count
125
+ del ts_map[event_stage_tid_pid]
126
+ elif start_end == '0':
127
+ ts = int(row['time_stamp(us)'])
128
+ ts_map[event_stage_tid_pid] = ts
129
+ elif start_end == '2':
130
+ logger.info("It is a instant event, skip to calculate execution time. item: %s.", row)
131
+ else:
132
+ logger.warning("Can not map the start time for item: %s.", row)
133
+
134
+
135
+ def _calculate_dataset_execution_time(input_file, output_file):
136
+ r"""
137
+ Parse the host info into timeline file, so as to show on UI.
138
+
139
+ Args:
140
+ input_file: the original host_info file, in csv format.
141
+ output_file: the output file, in csv format.
142
+ """
143
+ input_file = validate_and_normalize_path(input_file)
144
+ # execution_time_map is used to store the ExecutionCalculator for each stage.
145
+ execution_time_map = {}
146
+ # ts_map is used to store the start time of each event_stage_tid_pid.
147
+ ts_map = {}
148
+ with open(input_file, 'r') as f:
149
+ for row in csv.DictReader(f):
150
+ try:
151
+ module_name = row['module_name']
152
+ if module_name != 'Dataset':
153
+ continue
154
+ _calculate_dataset_item(row, execution_time_map, ts_map)
155
+ except KeyError as e:
156
+ logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
157
+ continue
158
+ if ts_map:
159
+ logger.warning("Only start time is record for these items:")
160
+ for k, v in ts_map.items():
161
+ logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
162
+ output_file = validate_and_normalize_path(output_file)
163
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
164
+ modes = stat.S_IWUSR | stat.S_IRUSR
165
+ with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
166
+ csv_writer = csv.writer(f)
167
+ csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
168
+ for _, v in execution_time_map.items():
169
+ csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
170
+ os.chmod(output_file, modes)
171
+ logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
172
+
173
+
174
+ def _extract_timeline_item(row, time_line, ts_map):
175
+ """Process one row, try to extract a timeline item."""
176
+ start_end = row['start_end']
177
+ event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
178
+ # map start and end, put the mapped event into timeline.
179
+ if start_end == '1' and event_stage_tid_pid in ts_map:
180
+ title = row['event'] + '::' + row['stage']
181
+ event = {'name': title, 'cat': row['module_name']}
182
+ ts_end = int(row['time_stamp(us)'])
183
+ ts = ts_map[event_stage_tid_pid]
184
+ event['ts'] = ts
185
+ event['dur'] = ts_end - ts
186
+ event['ph'] = 'X'
187
+ event['pid'] = row['pid']
188
+ event['tid'] = row['tid']
189
+ event['args'] = {'parent_pid': row['parent_pid']}
190
+ time_line.append(event)
191
+ del ts_map[event_stage_tid_pid]
192
+ elif start_end == '0':
193
+ ts = int(row['time_stamp(us)'])
194
+ ts_map[event_stage_tid_pid] = ts
195
+ # Put the instance event into timeline.
196
+ elif start_end == '2':
197
+ title = row['event'] + '::' + row['stage']
198
+ event = {
199
+ 'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
200
+ 'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
201
+ }
202
+ time_line.append(event)
203
+ else:
204
+ logger.warning("Can not map the start time for item: %s.", row)
205
+
206
+
207
+ def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
208
+ r"""
209
+ Parse the host info into timeline file, so as to show on UI.
210
+
211
+ Args:
212
+ input_file: the original host_info file, in csv format.
213
+ output_timeline_file: the output timeline file, in json format.
214
+ output_memory_file: the output memory_usage file, in csv format.
215
+ is_develop_user: some data only shown to develop users, other users no need to analyse it.
216
+ """
217
+ input_file = validate_and_normalize_path(input_file)
218
+ time_line = []
219
+ # ts_map is used to store the start time of each event_stage_tid_pid
220
+ ts_map = {}
221
+ memory_header = [
222
+ 'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
223
+ 'memory_usage(kB)', 'time_stamp(us)'
224
+ ]
225
+ memory_info = []
226
+ with open(input_file, 'r') as f:
227
+ for row in csv.DictReader(f):
228
+ try:
229
+ level = row['level']
230
+ if level == '0' and not is_develop_user:
231
+ continue
232
+ if int(row['time_stamp(us)']) > 0:
233
+ _extract_timeline_item(row, time_line, ts_map)
234
+ if int(row['memory_usage(kB)']) > 0:
235
+ memory_info.append(row)
236
+ except KeyError as e:
237
+ logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
238
+ continue
239
+ if memory_info:
240
+ with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
241
+ csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
242
+ csv_writer.writeheader()
243
+ for item in memory_info:
244
+ csv_writer.writerow(item)
245
+ os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
246
+ else:
247
+ logger.warning("No memory_usage is record in file: %s", input_file)
248
+
249
+ if ts_map:
250
+ logger.warning("Only start time is record for these items:")
251
+ for k, v in ts_map.items():
252
+ logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
253
+ last_dash = k.rfind('_')
254
+ if last_dash == -1:
255
+ logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
256
+ continue
257
+ second_last_dash = k.rfind('_', 0, last_dash - 1)
258
+ if second_last_dash == -1:
259
+ logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
260
+ continue
261
+ pid = k[last_dash + 1:]
262
+ tid = k[second_last_dash + 1: last_dash]
263
+ title = k[:second_last_dash]
264
+ unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
265
+ time_line.append(unfinished_timeline)
266
+
267
+ if time_line:
268
+ timeline_file = validate_and_normalize_path(output_timeline_file)
269
+ with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
270
+ json.dump(time_line, json_file)
271
+ os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
272
+ else:
273
+ logger.warning("No valid time_stamp is record in file: %s", input_file)
274
+
275
+
276
+ def _ascend_graph_msprof_generator(source_path, model_iteration_dict):
277
+ try:
278
+ msprof_exporter = AscendMsprofExporter(source_path)
279
+ msprof_exporter.export(model_iteration_dict)
280
+ except ProfilerException as err:
281
+ logger.warning(err.message)
282
+ finally:
283
+ pass
284
+
285
+
286
+ def _ascend_graph_msprof_analyse(source_path):
287
+ """
288
+ Ascend graph model msprof data analyse.
289
+
290
+ Returns:
291
+ list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
292
+ """
293
+ df_op_summary = []
294
+ df_op_statistic = []
295
+ df_step_trace = []
296
+ try:
297
+ msprof_analyser = AscendMsprofDataGenerator(os.path.join(source_path, 'summary'))
298
+ df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
299
+ except ProfilerException as err:
300
+ logger.warning(err.message)
301
+ finally:
302
+ pass
303
+ return df_op_summary, df_op_statistic, df_step_trace
304
+
305
+
85
306
  class Profiler:
86
307
  r"""
87
308
  This class to enable the profiling of MindSpore neural networks.
88
309
  MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
89
310
  and use Profiler.analyse() to stop profiling and analyse the results.
90
- Users can visualize the results using the MindInsight tool.
311
+ Users can visualize the results using the `MindSpore Insight
312
+ <https://www.mindspore.cn/mindinsight/docs/en/r2.2/index.html>`_ tool.
91
313
  Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
92
314
  correspondence, cluster, etc data analysis.
93
315
 
94
316
  Args:
95
- output_path (str, optional): Output data path. Default: "./data".
96
- op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
317
+ output_path (str, optional): Output data path. Default: ``"./data"`` .
318
+ op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
97
319
  profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
98
320
  a multi devices training,collect when True. Setting this parameter has no effect during single device
99
- training. When using this parameter, `op_time` must be set to True. Default: False.
100
- profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
101
- When using this parameter, `op_time` must be set to True. Default: False.
321
+ training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
322
+ profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
323
+ When using this parameter, `op_time` must be set to True. Default: ``False`` .
102
324
  parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
103
- Default value: true.
325
+ Default value: ``True`` .
104
326
  start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
105
- data collection based on conditions. Default: True.
327
+ data collection based on conditions. Default: ``True`` .
106
328
  aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
107
- parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5], Default: 0, the
108
- data items contained in each metric are as follows:
329
+ parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
330
+ Default: ``0`` , the data items contained in each metric are as follows:
109
331
 
110
332
  - -1: Does not collect AICORE data.
111
333
  - 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
@@ -116,9 +338,10 @@ class Profiler:
116
338
  - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
117
339
  - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
118
340
 
119
- l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True. Default: False.
341
+ l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
342
+ Default: ``False`` .
120
343
  sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
121
- Default: True.
344
+ Default: ``True`` .
122
345
 
123
346
  - True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
124
347
  Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
@@ -126,9 +349,18 @@ class Profiler:
126
349
  - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
127
350
  This method can reduce the impact of adding profiler on overall training time.
128
351
  data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
129
- Default value: True.
130
- timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
131
- parameter, `op_time` must be set to True. Default value: 500.
352
+ Default value: ``True`` .
353
+ timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
354
+ When using this parameter, `op_time` must be set to True. Default value: ``500`` .
355
+ profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
356
+ ["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
357
+ specified profiler directory, which stores the collected memory and time files on the Host side.
358
+ Default: "all".
359
+
360
+ - "all": Record both host timestamp and host memory usage.
361
+ - "time": Only record host timestamp.
362
+ - "memory": Only record host memory usage.
363
+ - None: Not record host information.
132
364
 
133
365
  Raises:
134
366
  RuntimeError: When the version of CANN does not match the version of MindSpore,
@@ -144,7 +376,6 @@ class Profiler:
144
376
  >>> import mindspore.dataset as ds
145
377
  >>> from mindspore import Profiler
146
378
  >>>
147
- >>>
148
379
  >>> class Net(nn.Cell):
149
380
  ... def __init__(self):
150
381
  ... super(Net, self).__init__()
@@ -160,7 +391,7 @@ class Profiler:
160
391
  ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
161
392
  ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
162
393
  ... data = ds.GeneratorDataset(generator, ["data", "label"])
163
- ... model = ms.Model(net, loss, optimizer)
394
+ ... model = ms.train.Model(net, loss, optimizer)
164
395
  ... model.train(1, data)
165
396
  >>>
166
397
  >>> if __name__ == '__main__':
@@ -219,9 +450,13 @@ class Profiler:
219
450
  self._sync_enable = True
220
451
  self._stop_time = 0
221
452
  self._dynamic_status = False
453
+ self._model_iteration_dict = None
454
+ self._profile_framework = "all"
222
455
  self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
223
456
  if self._msprof_enable:
224
457
  return
458
+ self._start_time = int(time.time() * 1000000)
459
+ logger.info("Profiling: start time: %d", self._start_time)
225
460
  if kwargs.get("env_enable"):
226
461
  self._profiler_init(kwargs)
227
462
  return
@@ -268,6 +503,25 @@ class Profiler:
268
503
 
269
504
  return job_start_time
270
505
 
506
+ @staticmethod
507
+ def _parse_info_json(info_file):
508
+ """
509
+ Parse info log file, get the rank id and device id of the job.
510
+ Args:
511
+ input_file (str): The file path of the parse info log file.
512
+
513
+ Returns:
514
+ rank id, device id
515
+ """
516
+ with open(info_file, "r") as f:
517
+ info_dict = json.load(f)
518
+
519
+ rank_id = info_dict.get("rank_id", 0)
520
+ dev_info = info_dict.get("DeviceInfo", [])
521
+ dev_id = dev_info[0].get("id", -1)
522
+
523
+ return str(rank_id), str(dev_id)
524
+
271
525
  def op_analyse(self, op_name, device_id=None):
272
526
  """
273
527
  Profiler users can use this interface to obtain operator performance data.
@@ -276,11 +530,11 @@ class Profiler:
276
530
  op_name (str or list): The primitive operator name to query.
277
531
  device_id (int, optional): ID of the target device. This parameter is optional during network training or
278
532
  inference, and users can use device_id parameter to specify which card operator performance data to
279
- parse. If this interface is used for offline data parsing, Default: 0.
533
+ parse. If this interface is used for offline data parsing, Default: ``0`` .
280
534
 
281
535
  Raises:
282
- TypeError: If the op_name parameter type is incorrect.
283
- TypeError: If the device_id parameter type is incorrect.
536
+ TypeError: If the `op_name` parameter type is incorrect.
537
+ TypeError: If the `device_id` parameter type is incorrect.
284
538
  RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
285
539
 
286
540
  Supported Platforms:
@@ -288,24 +542,25 @@ class Profiler:
288
542
 
289
543
  Examples:
290
544
  >>> from mindspore import Profiler
545
+ >>> from mindspore import nn
546
+ >>> from mindspore import Model
547
+ >>> # Profiler init.
548
+ >>> profiler = Profiler()
549
+ >>> # Train Model or eval Model, taking LeNet5 as an example.
550
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
551
+ >>> net = LeNet5()
552
+ >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
553
+ >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
554
+ >>> # Create the dataset taking MNIST as an example.
555
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/mnist.py
556
+ >>> dataloader = create_dataset()
557
+ >>> model = Model(net, loss, optimizer)
558
+ >>> model.train(5, dataloader, dataset_sink_mode=False)
291
559
  >>>
292
- ... # Profiler init.
293
- ... profiler = Profiler()
294
- ...
295
- ... # Train Model or eval Model.
296
- ... net = Net()
297
- ... train(net)
298
- ...
299
- ... # Profiler end
300
- ... profiler.analyse()
301
- ...
302
- ... profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
303
- ...
304
- >>> from mindspore import Profiler
560
+ >>> # Profiler end
561
+ >>> profiler.analyse()
305
562
  >>>
306
- ... # Profiler init.
307
- ... profiler = Profiler(output_path="my_profiler_path")
308
- ... profiler.op_analyse(op_name="Conv2D")
563
+ >>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
309
564
  """
310
565
  if self._device_target == 'ascend':
311
566
  raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
@@ -332,10 +587,33 @@ class Profiler:
332
587
  return message
333
588
  return op_info
334
589
 
335
- def analyse(self):
590
+ def analyse(self, offline_path=None):
336
591
  """
337
592
  Collect and analyze training performance data, support calls during and after training. The example shows above.
593
+
594
+ Args:
595
+ offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
596
+ Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
597
+ for online mode. Default: ``None``.
598
+ """
599
+ self._analyse(offline_path=offline_path)
600
+
601
+ def _analyse(self, offline_path=None, model_iteration_dict=None):
338
602
  """
603
+ Collect and analyze training performance data, support calls during and after training. The example shows above.
604
+
605
+ Args:
606
+ offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
607
+ Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
608
+ for online mode. Default: ``None``.
609
+ model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
610
+ """
611
+ self._model_iteration_dict = model_iteration_dict
612
+ if offline_path:
613
+ if self._is_offline_parser():
614
+ self._ascend_graph_analyse()
615
+ _offline_parse(offline_path)
616
+ return
339
617
  if self._msprof_enable:
340
618
  return
341
619
 
@@ -360,8 +638,19 @@ class Profiler:
360
638
 
361
639
  elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
362
640
  self._ascend_analyse()
641
+ if self._profile_framework:
642
+ if self._device_target != DeviceTarget.CPU.value:
643
+ self._host_info_analyse()
644
+ else:
645
+ logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
646
+ " directory in the output path.")
363
647
  logger.info("Profiling: all the data have been analyzed.")
364
648
  self._init_profiler_info()
649
+ self._is_support_step_info_collect()
650
+ parallel_mode = get_auto_parallel_context("parallel_mode")
651
+ stage_num = get_auto_parallel_context("pipeline_stages")
652
+
653
+ ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
365
654
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
366
655
  ProfilerInfo.set_rank_size(self._rank_size)
367
656
  ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
@@ -374,37 +663,36 @@ class Profiler:
374
663
  Raises:
375
664
  RuntimeError: If the profiler has already started.
376
665
  RuntimeError: If MD profiling has stopped, repeated start action is not supported.
377
- RuntimeError: If the start_profile parameter is not set or is set to True.
666
+ RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
378
667
 
379
668
  Examples:
380
- >>> class StopAtStep(Callback):
381
- >>> def __init__(self, start_step, stop_step):
382
- ... super(StopAtStep, self).__init__()
383
- ... self.start_step = start_step
384
- ... self.stop_step = stop_step
385
- ... self.profiler = Profiler(start_profile=False)
386
- ...
387
- >>> def step_begin(self, run_context):
388
- ... cb_params = run_context.original_args()
389
- ... step_num = cb_params.cur_step_num
390
- ... if step_num == self.start_step:
391
- ... self.profiler.start()
392
- ...
393
- >>> def step_end(self, run_context):
394
- ... cb_params = run_context.original_args()
395
- ... step_num = cb_params.cur_step_num
396
- ... if step_num == self.stop_step:
397
- ... self.profiler.stop()
398
- ...
399
- >>> def end(self, run_context):
400
- ... self.profiler.analyse()
669
+ >>> from mindspore.train import Callback
670
+ >>> from mindspore import Profiler
671
+ >>> class StopAtStep(Callback):
672
+ ... def __init__(self, start_step, stop_step):
673
+ ... super(StopAtStep, self).__init__()
674
+ ... self.start_step = start_step
675
+ ... self.stop_step = stop_step
676
+ ... self.profiler = Profiler(start_profile=False)
677
+ ...
678
+ ... def step_begin(self, run_context):
679
+ ... cb_params = run_context.original_args()
680
+ ... step_num = cb_params.cur_step_num
681
+ ... if step_num == self.start_step:
682
+ ... self.profiler.start()
683
+ ...
684
+ ... def step_end(self, run_context):
685
+ ... cb_params = run_context.original_args()
686
+ ... step_num = cb_params.cur_step_num
687
+ ... if step_num == self.stop_step:
688
+ ... self.profiler.stop()
689
+ ...
690
+ ... def end(self, run_context):
691
+ ... self.profiler.analyse()
401
692
  """
402
693
  if self._msprof_enable:
403
694
  return
404
695
 
405
- self._start_time = int(time.time() * 1000000)
406
- logger.info("Profiling: start time: %d", self._start_time)
407
-
408
696
  if not self._has_started:
409
697
  if not self._has_started_twice:
410
698
  self._has_started = True
@@ -421,13 +709,17 @@ class Profiler:
421
709
  return
422
710
 
423
711
  self._cpu_profiler.step_profiling_enable(True)
712
+ if self._op_time:
713
+ self._cpu_profiler.enable_op_time()
424
714
 
425
715
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
426
716
  if self._data_process:
427
717
  self._md_profiler.start()
428
718
  self._gpu_profiler.data_process_enable(True)
429
- if self._op_time:
719
+ if self._profile_framework or self._op_time:
430
720
  self._gpu_profiler.step_profiling_enable(True)
721
+ if self._op_time:
722
+ self._gpu_profiler.enable_op_time()
431
723
  elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
432
724
  if self._data_process:
433
725
  self._md_profiler.start()
@@ -442,27 +734,29 @@ class Profiler:
442
734
  RuntimeError: If the profiler has not started, this function is disabled.
443
735
 
444
736
  Examples:
445
- >>> class StopAtEpoch(Callback):
446
- >>> def __init__(self, start_epoch, stop_epoch):
447
- ... super(StopAtEpoch, self).__init__()
448
- ... self.start_epoch = start_epoch
449
- ... self.stop_epoch = stop_epoch
450
- ... self.profiler = Profiler(start_profile=False)
451
- ...
452
- >>> def epoch_begin(self, run_context):
453
- ... cb_params = run_context.original_args()
454
- ... epoch_num = cb_params.cur_epoch_num
455
- ... if epoch_num == self.start_epoch:
456
- ... self.profiler.start()
457
- ...
458
- >>> def epoch_end(self, run_context):
459
- ... cb_params = run_context.original_args()
460
- ... epoch_num = cb_params.cur_epoch_num
461
- ... if epoch_num == self.stop_epoch:
462
- ... self.profiler.stop()
463
- ...
464
- >>> def end(self, run_context):
465
- ... self.profiler.analyse()
737
+ >>> from mindspore.train import Callback
738
+ >>> from mindspore import Profiler
739
+ >>> class StopAtEpoch(Callback):
740
+ ... def __init__(self, start_epoch, stop_epoch):
741
+ ... super(StopAtEpoch, self).__init__()
742
+ ... self.start_epoch = start_epoch
743
+ ... self.stop_epoch = stop_epoch
744
+ ... self.profiler = Profiler(start_profile=False)
745
+ ...
746
+ ... def epoch_begin(self, run_context):
747
+ ... cb_params = run_context.original_args()
748
+ ... epoch_num = cb_params.cur_epoch_num
749
+ ... if epoch_num == self.start_epoch:
750
+ ... self.profiler.start()
751
+ ...
752
+ ... def epoch_end(self, run_context):
753
+ ... cb_params = run_context.original_args()
754
+ ... epoch_num = cb_params.cur_epoch_num
755
+ ... if epoch_num == self.stop_epoch:
756
+ ... self.profiler.stop()
757
+ ...
758
+ ... def end(self, run_context):
759
+ ... self.profiler.analyse()
466
760
  """
467
761
  if self._msprof_enable:
468
762
  return
@@ -506,6 +800,7 @@ class Profiler:
506
800
  self._profile_communication = options.get('profile_communication')
507
801
  self._op_time = options.get('op_time')
508
802
  self._device_target = context.get_context("device_target").lower()
803
+ self._profile_framework = options.get('profile_framework', 'all')
509
804
  self._profiler_manager = c_expression.ProfilerManager.get_instance()
510
805
  self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
511
806
  if self._data_process:
@@ -529,6 +824,10 @@ class Profiler:
529
824
  """Complete Profiler initialization according to device_target"""
530
825
  profiler_manager = c_expression.ProfilerManager
531
826
  self._profiler_manager = profiler_manager.get_instance()
827
+ if self._profile_framework is None:
828
+ self._profiler_manager.set_profile_framework("NULL")
829
+ else:
830
+ self._profiler_manager.set_profile_framework(self._profile_framework)
532
831
  if self._device_target:
533
832
  cpu_profiler = c_expression.Profiler
534
833
  self._cpu_profiler = cpu_profiler.get_instance("CPU")
@@ -614,13 +913,14 @@ class Profiler:
614
913
  "hccl": "on" if self._op_time and self._profile_communication else "off",
615
914
  "l2_cache": self._l2_cache,
616
915
  "parallel_strategy": "on" if self._parallel_strategy else "off",
617
- "op_time": "on" if self._op_time else "off"
916
+ "op_time": "on" if self._op_time else "off",
917
+ "profile_framework": self._profile_framework
618
918
  }
619
919
 
620
920
  return profiling_options
621
921
 
622
922
  def _parse_parameter_for_gpu(self, kwargs):
623
- """Parse parameter in Proflier when the device target is GPU."""
923
+ """Parse parameter in Profiler when the device target is GPU."""
624
924
  self.start_profile = kwargs.pop("start_profile", True)
625
925
  if not isinstance(self.start_profile, bool):
626
926
  raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
@@ -632,7 +932,7 @@ class Profiler:
632
932
  self._sync_enable = True
633
933
 
634
934
  def _parse_parameter_for_ascend(self, kwargs):
635
- """Parse parameter in Proflier when the device target is Ascend."""
935
+ """Parse parameter in Profiler when the device target is Ascend."""
636
936
  ascend_job_id = kwargs.pop("ascend_job_id", "")
637
937
  self._set_ascend_job_id(ascend_job_id)
638
938
  self.start_profile = kwargs.pop("start_profile", True)
@@ -708,29 +1008,6 @@ class Profiler:
708
1008
  return bool(self._ascend_job_id)
709
1009
  return False
710
1010
 
711
- def _ascend_pynative_analyse(self):
712
- """Collect and analyse ascend pynative mode performance data."""
713
- self._ascend_profiler.finalize()
714
- op_intermediate_parser = OPIntermediateParser(self._output_path, self._rank_id)
715
- op_intermediate_parser.parser_pynative_op_type()
716
- op_intermediate_parser.parser_pynative_op_intermediate_detail()
717
-
718
- job_id = self._get_profiling_job_id()
719
- logger.info("Profiling: job id is %s ", job_id)
720
- self._check_output_path(output_path=self._output_path)
721
- source_path = os.path.join(self._output_path, job_id)
722
- MinddataParser.execute(source_path, self._output_path, self._rank_id)
723
-
724
- pipeline_parser = MinddataPipelineParser(self._output_path, self._rank_id, self._output_path)
725
- logger.info("Profiling: analyzing the minddata pipeline operator and queue.")
726
- pipeline_parser.parse()
727
-
728
- timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id,
729
- self._rank_size, context.get_context("mode"))
730
- timeline_analyser.init_pynative_timeline()
731
- timeline_analyser.write_timeline(self._timeline_size_limit_byte)
732
- timeline_analyser.write_timeline_summary()
733
-
734
1011
  def _ascend_analyse(self):
735
1012
  """Collect and analyse ascend performance data."""
736
1013
  self._rank_size = 1
@@ -746,30 +1023,128 @@ class Profiler:
746
1023
  self.stop()
747
1024
  else:
748
1025
  logger.info("No need to stop profiler because profiler has been stopped.")
1026
+ # export op data before analyse
749
1027
  self._ascend_graph_analyse()
750
1028
 
751
- def _ascend_timeline_analyse(self, aicpu_data_parser, optime_parser, source_path):
752
- """Analyse timeline info."""
1029
+ def _minddata_analyse(self, source_path):
1030
+ """Analyse mindadata for ascend graph model."""
1031
+ if not self._data_process:
1032
+ return
1033
+ store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1034
+ # Parsing minddata AICPU profiling
1035
+ if self._device_target == DeviceTarget.ASCEND.value:
1036
+ logger.info("Profiling: analyzing the minddata AICPU data.")
1037
+ MinddataParser.execute(source_path, self._output_path, store_id)
1038
+
1039
+ # parse minddata pipeline operator and queue
753
1040
  try:
754
- self._analyse_timeline(aicpu_data_parser, optime_parser, source_path)
755
- except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
756
- logger.warning('Fail to write timeline data: %s', err)
1041
+ MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
1042
+ except ProfilerException as err:
1043
+ logger.warning(err.message)
757
1044
  finally:
758
1045
  pass
759
1046
 
760
- def _ascend_step_trace_analyse(self, source_path, framework_parser):
1047
+ # Analyze minddata information
1048
+ logger.info("Profiling: analyzing the minddata information.")
1049
+ try:
1050
+ MinddataProfilingAnalyzer(self._output_path, store_id, self._output_path).analyze()
1051
+ except ProfilerException as err:
1052
+ logger.warning(err.message)
1053
+ finally:
1054
+ pass
1055
+
1056
+ def _ascend_fpbp_analyse(self, op_summary, steptrace):
1057
+ """
1058
+ Ascned graph model op analyse.
1059
+
1060
+ Returns:
1061
+ dict[obj]: points: the fp bp information
1062
+ """
1063
+ points = None
1064
+ try:
1065
+ dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1066
+ step_trace_point_info_path = os.path.join(self._output_path, f'step_trace_point_info_{dev_id}.json')
1067
+
1068
+ step_trace_point_info_path = validate_and_normalize_path(step_trace_point_info_path)
1069
+
1070
+ fpbp_analyse = AscendFPBPGenerator(op_summary, steptrace)
1071
+ points, _ = fpbp_analyse.parse()
1072
+ fpbp_analyse.write(step_trace_point_info_path)
1073
+ except ProfilerException as err:
1074
+ logger.warning(err.message)
1075
+ finally:
1076
+ pass
1077
+ return points
1078
+
1079
+ def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
1080
+ """
1081
+ Ascend graph model hwts analyse.
1082
+
1083
+ Returns:
1084
+ list[obj]: The list is: framework_parser, aicpu_data_parser, optime_parser, op_task_dict
1085
+ """
1086
+ try:
1087
+ dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1088
+
1089
+ op_intermediate_detail_path = os.path.join(self._output_path,
1090
+ f'aicore_intermediate_{dev_id}_detail.csv')
1091
+ op_intermediate_type_path = os.path.join(self._output_path, f'aicore_intermediate_{dev_id}_type.csv')
1092
+ aicpu_intermediate_detail_path = os.path.join(self._output_path, f'aicpu_intermediate_{dev_id}.csv')
1093
+ framework_raw_path = os.path.join(self._output_path, f'framework_raw_{dev_id}.csv')
1094
+
1095
+ op_intermediate_detail_path = validate_and_normalize_path(op_intermediate_detail_path)
1096
+ op_intermediate_type_path = validate_and_normalize_path(op_intermediate_type_path)
1097
+ aicpu_intermediate_detail_path = validate_and_normalize_path(aicpu_intermediate_detail_path)
1098
+ framework_raw_path = validate_and_normalize_path(framework_raw_path)
1099
+
1100
+ if context.get_context("mode") == context.GRAPH_MODE:
1101
+ output_timeline_data_path = os.path.join(self._output_path, f'output_timeline_data_{dev_id}.txt')
1102
+ output_timeline_data_path = validate_and_normalize_path(output_timeline_data_path)
1103
+ else:
1104
+ output_timeline_data_path = None
1105
+
1106
+ op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
1107
+ op_analyser.parse()
1108
+ op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
1109
+ aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
1110
+ except ProfilerException as err:
1111
+ logger.warning(err.message)
1112
+ finally:
1113
+ pass
1114
+
1115
+ def _ascend_step_trace_analyse(self, steptrace):
761
1116
  """Analyse step trace info."""
762
- points, is_training_mode_flag = None, False
763
1117
  try:
764
- if self._is_support_step_info_collect() and not self._dynamic_status:
765
- points, is_training_mode_flag = self._analyse_step_trace(source_path, framework_parser)
1118
+ if not self._dynamic_status:
1119
+ dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1120
+ step_trace_intermediate_path = os.path.join(self._output_path,
1121
+ f'step_trace_raw_{dev_id}_detail_time.csv')
1122
+
1123
+ step_trace_intermediate_path = validate_and_normalize_path(step_trace_intermediate_path)
1124
+
1125
+ steptrace_analyser = AscendStepTraceGenerator(steptrace)
1126
+ steptrace_analyser.parse()
1127
+ steptrace_analyser.write(step_trace_intermediate_path)
766
1128
  except ProfilerException as err:
767
1129
  logger.warning(err.message)
768
1130
  finally:
769
1131
  pass
770
- return points, is_training_mode_flag
771
1132
 
772
- def _ascend_dynamic_net_analyse(self):
1133
+ def _ascend_timeline_analyse(self, op_summary, steptrace):
1134
+ """Analyse timeline info."""
1135
+ try:
1136
+ logger.info("Profiling: analyzing the timeline data")
1137
+ timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id, self._rank_size,
1138
+ context.get_context('mode'))
1139
+ timeline_analyser.init_timeline(op_summary, steptrace)
1140
+ timeline_analyser.write_timeline(self._timeline_size_limit_byte)
1141
+ timeline_analyser.write_timeline_summary()
1142
+ except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1143
+ logger.warning('Fail to write timeline data: %s', err)
1144
+ finally:
1145
+ pass
1146
+
1147
+ def _ascend_dynamic_net_analyse(self, op_summary):
773
1148
  """Analyse dynamic shape network info."""
774
1149
  if self._profile_communication:
775
1150
  logger.warning(
@@ -779,14 +1154,31 @@ class Profiler:
779
1154
  logger.warning(
780
1155
  "[Profiler]Dynamic Shape network does not support collecting step trace performance data currently.")
781
1156
  dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id)
782
- dynamic_parser.write_dynamic_shape_data()
1157
+ dynamic_parser.write_dynamic_shape_data(op_summary)
1158
+
1159
+ def _ascend_flops_analyse(self, op_summary):
1160
+ """Get op FLOPs from op_summary, write output_op_flops_x.csv."""
1161
+ if len(op_summary.dtype) != 18:
1162
+ logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
1163
+ return
1164
+
1165
+ try:
1166
+ dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1167
+
1168
+ flops_path = os.path.join(self._output_path, f'flops_{dev_id}.txt')
1169
+ flops_summary_path = os.path.join(self._output_path, f'flops_summary_{dev_id}.json')
1170
+
1171
+ flops_path = validate_and_normalize_path(flops_path)
1172
+ flops_summary_path = validate_and_normalize_path(flops_summary_path)
783
1173
 
784
- def _ascend_flops_analyse(self, source_path, op_task_dict, is_training_mode_flag):
785
- """Get op FLOPs from aicore.data.x.slice.0 file, and compute FLOPS, write output_op_flops_x.txt."""
786
- flops_parser = FlopsParser(source_path, self._output_path, op_task_dict, self._dev_id, self._rank_id,
787
- is_training_mode_flag)
788
- logger.info("Profiling: analyzing the operation FLOPs.")
789
- flops_parser.execute()
1174
+ flops_analyser = AscendFlopsGenerator(op_summary)
1175
+ flops_analyser.parse()
1176
+ flops_analyser.write(flops_path, flops_summary_path)
1177
+
1178
+ except ProfilerException as err:
1179
+ logger.warning(err.message)
1180
+ finally:
1181
+ pass
790
1182
 
791
1183
  def _ascend_graph_memory_analyse(self, points):
792
1184
  """Analyse memory usage info."""
@@ -803,16 +1195,25 @@ class Profiler:
803
1195
  finally:
804
1196
  pass
805
1197
 
806
- def _ascend_graph_hccl_analyse(self):
1198
+ def _ascend_graph_hccl_analyse(self, source_path):
807
1199
  """Analyse hccl profiler info."""
808
1200
  if not self._profile_communication:
809
1201
  return
810
1202
  if self._profile_communication and context.get_context("mode") == context.PYNATIVE_MODE:
811
1203
  logger.warning("[Profiler]The parameter profile_communication is not supported on Ascend "
812
1204
  "PyNative mode currently.")
1205
+ return
813
1206
  try:
814
1207
  logger.info("Profiling: analyzing the hccl profiler info.")
815
- self._analyse_hccl_info()
1208
+ dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1209
+
1210
+ hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
1211
+ hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
1212
+
1213
+ hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'))
1214
+ hccl_analyse.parse()
1215
+ hccl_analyse.write(hccl_raw_path)
1216
+
816
1217
  except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
817
1218
  logger.warning(err.message)
818
1219
  finally:
@@ -838,106 +1239,34 @@ class Profiler:
838
1239
  if context.get_context("mode") == context.PYNATIVE_MODE:
839
1240
  logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
840
1241
 
841
- def _ascend_graph_op_analyse(self, source_path):
842
- """
843
- Ascend graph model hwts analyse.
844
-
845
- Returns:
846
- list[obj]: The list is: framework_parser, aicpu_data_parser, optime_parser, op_task_dict
847
- """
848
- # parse hwts.log.data.45.dev file, and get task profiling data
849
- hwts_output_filename = self._hwts_output_filename_target + self._rank_id + ".txt"
850
- hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
851
- source_path = validate_and_normalize_path(source_path)
852
- hwts_output_filename = validate_and_normalize_path(hwts_output_filename)
853
- hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename, self._dynamic_status)
854
- logger.info("Profiling: analyzing hwts data.")
855
- hwtslog_parser.execute()
856
-
857
- # parse Framework file, and get the relation of op and tasks
858
- framework_parser = FrameworkParser(source_path, self._rank_id, self._output_path)
859
- logger.info("Profiling: analyzing framework data.")
860
- framework_parser.parse()
861
- op_task_dict = framework_parser.to_task_id_full_op_name_dict()
862
- if not op_task_dict:
863
- raise RuntimeError('Profiling: fail to parse framework files.')
864
-
865
- # get op compute time from hwts data and framework data, write output_op_compute_time.txt
866
- opcompute_output_filename = self._opcompute_output_filename_target + self._rank_id + ".txt"
867
- opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
868
- opcompute_output_filename = validate_and_normalize_path(opcompute_output_filename)
869
- optime_parser = OPComputeTimeParser(
870
- hwts_output_filename, opcompute_output_filename,
871
- op_task_dict, self._output_path, self._rank_id
872
- )
873
- logger.info("Profiling: analyzing the operation compute time.")
874
- optime_parser.execute()
875
-
876
- # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
877
- output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._rank_id + ".txt"
878
- output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
879
- output_data_preprocess_aicpu = validate_and_normalize_path(output_data_preprocess_aicpu)
880
- aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu, op_task_dict)
881
- logger.info("Profiling: analyzing the data preprocess data.")
882
- aicpu_data_parser.execute()
883
-
884
- # analyse op compute time info
885
- try:
886
- self._analyser_op_info()
887
- except ProfilerException as err:
888
- logger.warning(err.message)
889
- finally:
890
- pass
891
- return [framework_parser, aicpu_data_parser, optime_parser, op_task_dict]
892
-
893
- def _minddata_analyse(self, source_path):
894
- """Analyse mindadata for ascend graph model."""
895
- if not self._data_process:
896
- return
897
- store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
898
- # Parsing minddata AICPU profiling
899
- if self._device_target == DeviceTarget.ASCEND.value:
900
- logger.info("Profiling: analyzing the minddata AICPU data.")
901
- MinddataParser.execute(source_path, self._output_path, store_id)
902
-
903
- # parse minddata pipeline operator and queue
904
- try:
905
- MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
906
- except ProfilerException as err:
907
- logger.warning(err.message)
908
- finally:
909
- pass
910
-
911
- # Analyze minddata information
912
- logger.info("Profiling: analyzing the minddata information.")
913
- try:
914
- MinddataProfilingAnalyzer(self._output_path, store_id, self._output_path).analyze()
915
- except ProfilerException as err:
916
- logger.warning(err.message)
917
- finally:
918
- pass
919
-
920
1242
  def _ascend_graph_analyse(self):
921
1243
  """Ascend graph mode analyse."""
922
1244
  self._ascend_profiler.finalize()
923
1245
 
924
1246
  job_id = self._get_profiling_job_id()
1247
+ if not job_id:
1248
+ return
925
1249
  logger.info("Profiling: job id is %s ", job_id)
926
1250
 
927
1251
  self._check_output_path(output_path=self._output_path)
928
1252
  source_path = os.path.join(self._output_path, job_id)
929
1253
  self._minddata_analyse(source_path)
930
1254
  if self._op_time:
931
- framework_parser, aicpu_data_parser, optime_parser, op_task_dict = self._ascend_graph_op_analyse(
932
- source_path)
933
- points, is_training_mode_flag = self._ascend_step_trace_analyse(source_path, framework_parser)
934
- self._ascend_timeline_analyse(aicpu_data_parser, optime_parser, source_path)
1255
+ _ascend_graph_msprof_generator(source_path, self._model_iteration_dict)
1256
+ op_summary, op_statistic, steptrace = _ascend_graph_msprof_analyse(source_path)
1257
+ self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status)
1258
+ self._ascend_timeline_analyse(op_summary, steptrace)
1259
+ graph_ids = np.unique(op_summary['Model ID']).tolist()
1260
+ points = self._ascend_fpbp_analyse(op_summary, steptrace)
1261
+ if len(graph_ids) == 1:
1262
+ self._ascend_step_trace_analyse(steptrace)
935
1263
  if self._dynamic_status:
936
- self._ascend_dynamic_net_analyse()
937
- self._ascend_flops_analyse(source_path, op_task_dict, is_training_mode_flag)
1264
+ self._ascend_dynamic_net_analyse(op_summary)
1265
+ self._ascend_flops_analyse(op_summary)
938
1266
  self._ascend_graph_memory_analyse(points)
939
- self._ascend_graph_hccl_analyse()
1267
+ self._ascend_graph_hccl_analyse(source_path)
940
1268
  self._ascend_graph_msadvisor_analyse(job_id)
1269
+ ProfilerInfo.set_graph_ids(graph_ids)
941
1270
 
942
1271
  def _ascend_graph_start(self):
943
1272
  """Ascend graph mode start profiling."""
@@ -973,7 +1302,7 @@ class Profiler:
973
1302
  """Whether iteration related information needs to be parsed."""
974
1303
  profiler_info = ProfilerInfo.get_profiler_info()
975
1304
  graph_ids = profiler_info.get("graph_ids")
976
- if len(graph_ids) > 1:
1305
+ if graph_ids and len(graph_ids) > 1:
977
1306
  analyse_step_trace = False
978
1307
  logger.warning(
979
1308
  "[Profiler]Current model has multiple sub graphs, the segmentation of steps may be inaccurate.")
@@ -997,13 +1326,12 @@ class Profiler:
997
1326
  parser = GpuFrameWorkParser(self._output_path, self._dev_id)
998
1327
  graph_ids = parser.get_graph_ids()
999
1328
  ProfilerInfo.set_graph_ids(graph_ids)
1000
- if self._is_support_step_info_collect():
1001
- self._analyse_step_trace(
1002
- is_training_mode_flag=timeline_generator.check_op_name('Gradients'),
1003
- is_gpu_kernel_async_launch_flag=timeline_generator.is_gpu_kernel_async_launch()
1004
- )
1005
- if self._dynamic_status:
1006
- parser.analyse_dynamic_shape_data(self._timeline_meta)
1329
+ self._analyse_step_trace(
1330
+ is_training_mode_flag=timeline_generator.check_op_name('Gradients'),
1331
+ is_gpu_kernel_async_launch_flag=timeline_generator.is_gpu_kernel_async_launch()
1332
+ )
1333
+ if self._dynamic_status:
1334
+ parser.analyse_dynamic_shape_data(self._timeline_meta)
1007
1335
 
1008
1336
  def _get_step_reduce_op_type(self):
1009
1337
  """Gets all communication operator names."""
@@ -1021,7 +1349,8 @@ class Profiler:
1021
1349
 
1022
1350
  def _cpu_analyse(self):
1023
1351
  """Collect and analyse cpu performance data."""
1024
-
1352
+ if not self._op_time:
1353
+ return
1025
1354
  try:
1026
1355
  timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
1027
1356
  timeline_generator.init_timeline()
@@ -1031,7 +1360,8 @@ class Profiler:
1031
1360
  logger.warning('Fail to write timeline data: %s', err)
1032
1361
  raise RuntimeError('Fail to write timeline data.') from err
1033
1362
  if context.get_context("mode") == context.PYNATIVE_MODE:
1034
- raise RuntimeError("Pynative mode is not supported on CPU currently.")
1363
+ raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
1364
+ "data.")
1035
1365
 
1036
1366
  def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
1037
1367
  is_gpu_kernel_async_launch_flag=False):
@@ -1058,29 +1388,37 @@ class Profiler:
1058
1388
  point_info_file_path = validate_and_normalize_path(point_info_file_path)
1059
1389
 
1060
1390
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
1061
- input_file_path = os.path.join(self._output_path, f'step_trace_profiling_{self._dev_id}.txt')
1062
- input_file_path = validate_and_normalize_path(input_file_path)
1063
- parser = GpuStepTraceParser(input_dir=input_file_path,
1064
- output_file_path=step_trace_intermediate_file_path,
1065
- is_training_mode=is_training_mode_flag,
1066
- is_gpu_kernel_async_launch=is_gpu_kernel_async_launch_flag)
1067
- parser.parse_and_save()
1068
- point_info = parser.record_point_info(point_info_file_path)
1069
- else:
1070
- # whether keep the first step
1071
- skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
1072
- point_info = framework_parser.point_info
1073
- # recognize inference or training mode
1074
- is_training_mode_flag = framework_parser.check_op_name("Gradients")
1075
- # parser the step trace files and save the result to disk
1076
- source_path = validate_and_normalize_path(source_path)
1077
- parser = AscendStepTraceParser(input_dir=source_path,
1078
- output_file_path=step_trace_intermediate_file_path,
1079
- skip_first_step=skip_first_step_flag,
1080
- is_training_mode=is_training_mode_flag)
1081
- parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
1082
- parser.parse_and_save()
1083
- point_info = parser.record_point_info(point_info_file_path)
1391
+ if context.get_context("mode") != context.PYNATIVE_MODE:
1392
+ input_file_path = os.path.join(self._output_path, f'step_trace_profiling_{self._dev_id}.txt')
1393
+ input_file_path = validate_and_normalize_path(input_file_path)
1394
+ parser = GpuStepTraceParser(input_dir=input_file_path,
1395
+ output_file_path=step_trace_intermediate_file_path,
1396
+ is_training_mode=is_training_mode_flag,
1397
+ is_gpu_kernel_async_launch=is_gpu_kernel_async_launch_flag)
1398
+ parser.parse_and_save()
1399
+ point_info = parser.record_point_info(point_info_file_path)
1400
+ # print parser result
1401
+ parser.show()
1402
+ logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1403
+ logger.info("The point info is: %s", point_info)
1404
+
1405
+ return point_info, is_training_mode_flag
1406
+ return {}, is_training_mode_flag
1407
+
1408
+ # whether keep the first step
1409
+ skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
1410
+ # recognize inference or training mode
1411
+ is_training_mode_flag = framework_parser.check_op_name("Gradients")
1412
+ # parser the step trace files and save the result to disk
1413
+ source_path = validate_and_normalize_path(source_path)
1414
+ parser = AscendStepTraceParser(input_dir=source_path,
1415
+ output_file_path=step_trace_intermediate_file_path,
1416
+ skip_first_step=skip_first_step_flag,
1417
+ is_training_mode=is_training_mode_flag)
1418
+ parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
1419
+ parser.parse_and_save()
1420
+ point_info = parser.record_point_info(point_info_file_path)
1421
+
1084
1422
  # print parser result
1085
1423
  parser.show()
1086
1424
  logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
@@ -1088,45 +1426,6 @@ class Profiler:
1088
1426
 
1089
1427
  return point_info, is_training_mode_flag
1090
1428
 
1091
- def _analyse_timeline(self, aicpu_parser, optime_parser, source_path):
1092
- """
1093
- Analyse and parse timeline info.
1094
-
1095
- Args:
1096
- aicpu_parser (DataPreProcessParser): The parser instance for AI CPU operator
1097
- execution time calculation.
1098
- optime_parser (OPComputeTimeParserParser): The parser instance for AI Core
1099
- operator execution time calculation.
1100
- """
1101
- logger.info("Profiling: analyzing the timeline data.")
1102
- timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id,
1103
- self._rank_size, context.get_context("mode"))
1104
- # Get framework info
1105
- integrator = Integrator(self._output_path, self._rank_id)
1106
- aicore_detail_data = integrator.get_aicore_detail_data()
1107
- aicore_detail_data_size = len(aicore_detail_data)
1108
- col_names = ['op_name', 'op_type', 'avg_execution_time', 'subgraph',
1109
- 'full_op_name', 'op_info']
1110
- framework_info = {
1111
- 'col_name': col_names,
1112
- 'object': aicore_detail_data,
1113
- 'size': aicore_detail_data_size
1114
- }
1115
-
1116
- all_reduce_info = integrator.query_for_all_reduce()
1117
-
1118
- # Get timeline info
1119
- logger.info('Start writing timeline info...')
1120
- logger.info('Warm Prompt: It could take a few minutes if you are training '
1121
- 'with a complex network or more than 10 steps.')
1122
- # Add info into timeline, such as AI CPU, AllReduce, framework info.
1123
- aicpu_info = aicpu_parser.query_aicpu_data()
1124
- min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter)
1125
- timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info,
1126
- min_cycle_counter, source_path)
1127
- timeline_analyser.write_timeline(self._timeline_size_limit_byte)
1128
- timeline_analyser.write_timeline_summary()
1129
-
1130
1429
  def _generate_timeline(self, reduce_op_type):
1131
1430
  """Used for gpu, generate timeline info, write to json format file."""
1132
1431
  try:
@@ -1164,11 +1463,10 @@ class Profiler:
1164
1463
  return job_id
1165
1464
 
1166
1465
  job_id = ""
1167
- job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and \
1168
- os.path.isdir(os.path.join(self._output_path, item)),
1169
- os.listdir(self._output_path))
1170
- sorted_job_dirs = sorted(job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)),
1171
- reverse=True)
1466
+ job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
1467
+ os.path.join(self._output_path, item)), os.listdir(self._output_path))
1468
+ sorted_job_dirs = sorted(
1469
+ job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
1172
1470
 
1173
1471
  for dir_name in sorted_job_dirs:
1174
1472
  if dir_name.startswith('PROF'):
@@ -1185,22 +1483,21 @@ class Profiler:
1185
1483
  "profiler will ignore this job dir.", job_dir)
1186
1484
  continue
1187
1485
 
1188
- training_device_id = start_file_path.split('.')[-1]
1486
+ info_file_path = get_file_path(job_dir, "info.json")
1487
+ if info_file_path is None:
1488
+ logger.warning("Find profiling job path %s, but info.json not exist, "
1489
+ "profiler will ignore this job dir.", job_dir)
1490
+ continue
1491
+
1492
+ _, training_device_id = self._parse_info_json(info_file_path)
1493
+ job_start_time = self._parse_start_log(start_file_path)
1494
+
1189
1495
  if self._dev_id != training_device_id:
1190
1496
  logger.debug("Find profiling find job path %s, but not current training device id. "
1191
1497
  "Current training device id %s, but job path device id: %s, "
1192
1498
  "profiler will ignore this job dir.", job_dir, self._dev_id, training_device_id)
1193
1499
  continue
1194
1500
 
1195
- if not os.listdir(os.path.join(job_dir, 'data')):
1196
- continue
1197
-
1198
- job_start_time = self._parse_start_log(start_file_path)
1199
- if not job_start_time:
1200
- logger.warning("Find profiling job path %s, but fail to get job start info, "
1201
- "profiler will ignore this job dir.", job_start_time)
1202
- continue
1203
-
1204
1501
  if int(job_start_time) < self._start_time:
1205
1502
  logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
1206
1503
  "start_time(%d), profiler will ignore this job dir.",
@@ -1218,38 +1515,10 @@ class Profiler:
1218
1515
  "please check whether job dir or prof dir(name startswith JOB or PROF) in output path " \
1219
1516
  "was generated, or may be the device id from job dir dismatch the " \
1220
1517
  "device_id in current process.".format(self._output_path)
1221
- raise RuntimeError(msg)
1518
+ logger.warning(msg)
1222
1519
 
1223
1520
  return job_id
1224
1521
 
1225
- def _analyser_op_info(self):
1226
- """Analyse the operator information."""
1227
- logger.info("Profiling: analyzing the operation compute time.")
1228
- integrator = Integrator(self._output_path, self._rank_id)
1229
- integrator.integrate()
1230
-
1231
- aicore_type_result = self._query_op_type_info()
1232
- detail_file_path = os.path.join(
1233
- self._output_path,
1234
- 'output_op_compute_time_detail_{}.txt'.format(self._rank_id)
1235
- )
1236
- fwrite_format(detail_file_path, data_source='title:op compute time')
1237
- display_names = [
1238
- 'optype_name', 'compute_time(ms, per-step)',
1239
- 'called_times(per-step)', 'percent'
1240
- ]
1241
- fwrite_format(detail_file_path, data_source=" ".join(display_names), is_print=True)
1242
- fwrite_format(detail_file_path, data_source=aicore_type_result, is_print=True)
1243
-
1244
- op_type_order = [item[0] for item in aicore_type_result]
1245
- aicore_detail_result = self._query_op_detail_info(op_type_order)
1246
-
1247
- fwrite_format(detail_file_path, data_source='', is_print=True)
1248
- fwrite_format(detail_file_path, data_source='Detail:', is_print=True)
1249
- fwrite_format(detail_file_path, data_source=" ".join(aicore_detail_result.get('col_name_detail')),
1250
- is_print=True)
1251
- fwrite_format(detail_file_path, data_source=aicore_detail_result.get('object'), is_print=True)
1252
-
1253
1522
  def _query_op_type_info(self):
1254
1523
  """
1255
1524
  Query AICORE operator type information.
@@ -1295,7 +1564,8 @@ class Profiler:
1295
1564
  logger.error("Profiling: fail to get context, %s", err)
1296
1565
 
1297
1566
  if not dev_id or not dev_id.isdigit():
1298
- dev_id = os.getenv('DEVICE_ID')
1567
+ dev_id = str(get_local_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
1568
+ else os.getenv('DEVICE_ID')
1299
1569
  if not dev_id or not dev_id.isdigit():
1300
1570
  dev_id = "0"
1301
1571
  logger.warning("Fail to get DEVICE_ID, use 0 instead.")
@@ -1305,7 +1575,8 @@ class Profiler:
1305
1575
  msg = "Profiling: unsupported backend: %s" % device_target
1306
1576
  raise RuntimeError(msg)
1307
1577
 
1308
- rank_id = os.getenv("RANK_ID")
1578
+ rank_id = str(get_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
1579
+ else os.getenv("RANK_ID")
1309
1580
  if not rank_id or not rank_id.isdigit():
1310
1581
  rank_id = "0"
1311
1582
  logger.warning(f"For '{self.__class__.__name__}', fail to get RANK_ID from environment, "
@@ -1313,7 +1584,10 @@ class Profiler:
1313
1584
 
1314
1585
  self._dev_id = dev_id
1315
1586
  self._device_target = device_target.lower()
1316
- self._rank_id = rank_id
1587
+ if device_target == DeviceTarget.GPU.value:
1588
+ self._rank_id = dev_id
1589
+ else:
1590
+ self._rank_id = rank_id
1317
1591
 
1318
1592
  def _get_output_path(self, kwargs):
1319
1593
  """Get output path of profiling data."""
@@ -1377,28 +1651,55 @@ class Profiler:
1377
1651
  "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
1378
1652
  timeline_limit = 500
1379
1653
  self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
1654
+ self._profile_framework = kwargs.pop("profile_framework", "all")
1655
+ if self._profile_framework not in ["memory", "time", "all", None]:
1656
+ logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
1657
+ f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
1658
+ self._profile_framework = "all"
1380
1659
 
1381
- def _analyse_hccl_info(self):
1382
- """Analyse hccl info."""
1383
- hccl_path = os.path.join(self._output_path, "hccl_info_{}".format(self._rank_id))
1384
- if not os.path.exists(hccl_path):
1385
- os.makedirs(hccl_path, exist_ok=True)
1386
- os.chmod(hccl_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1387
- logger.info("Start call the interface HCCLParseOP parsing hccl info...")
1388
- logger.info('Warm Prompt: It could take a few minutes if you are training '
1389
- 'with a complex network or more than 10 steps.')
1390
- # Call the interface HCCLParseOP parsing hccl info.
1391
- try:
1392
- from hccl_parser.entry import hccl_parse_op
1393
- hccl_parse_op(self._dev_id, self._output_path, hccl_path, op_type='all')
1394
- except ImportError as err:
1395
- logger.critical("%s,please check if the hccl_parser-{version}-py3-none-any.whl is installed."
1396
- "The hccl_parser-{version}-py3-none-any.whl package is usually located "
1397
- "in the /usr/local/Ascend/tools Directory", err)
1398
- raise ImportError(err) from err
1399
-
1400
- logger.info("Parse hccl info successfully.")
1401
- logger.info("Start analyse hccl info.")
1402
- hccl_parse = HcclParser(hccl_path, self._dev_id, self._rank_id, self._output_path)
1403
- hccl_parse.parse()
1404
- logger.info("Analyse hccl info successfully.")
1660
+ def _host_info_analyse(self):
1661
+ """
1662
+ Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
1663
+ """
1664
+ logger.info("Profiling HostInfo start.")
1665
+ host_dir = os.path.join(self._output_path, 'host_info')
1666
+ host_dir = validate_and_normalize_path(host_dir)
1667
+ if not os.path.exists(host_dir):
1668
+ logger.error("Host info directory: %s not exist.", host_dir)
1669
+ return
1670
+ csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
1671
+ json_file_name = 'timeline_' + str(self._rank_id) + '.json'
1672
+ memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
1673
+ dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
1674
+ host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
1675
+ timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
1676
+ memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
1677
+ dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
1678
+ _parse_host_info(host_info_file, timeline_file, memory_file)
1679
+ _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
1680
+ logger.info("Profile HostInfo finished.")
1681
+
1682
+
1683
+ def _offline_parse(offline_path):
1684
+ """Parse data in abnormal scenario, only support for host_info at present."""
1685
+ logger.info("Profiling HostInfo offline start.")
1686
+ host_dir = os.path.join(offline_path, 'profiler', 'host_info')
1687
+ host_dir = validate_and_normalize_path(host_dir)
1688
+ if not os.path.exists(host_dir):
1689
+ logger.error("Host info directory: %s not exist.", host_dir)
1690
+ return
1691
+ files = os.listdir(host_dir)
1692
+ for file in files:
1693
+ if not file.startswith("host_info_") or not file.endswith(".csv"):
1694
+ continue
1695
+ rank_id = file.split('_')[-1].split('.')[0]
1696
+ if not rank_id.isdigit():
1697
+ logger.info("Cannot get rank_id from file: %s, skip it", file)
1698
+ return
1699
+ host_info_file = os.path.join(host_dir, file)
1700
+ timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
1701
+ memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
1702
+ dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
1703
+ _parse_host_info(host_info_file, timeline_file, memory_file)
1704
+ _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
1705
+ logger.info("Profile HostInfo offline finished.")