mindspore 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/ConcurrencyCheck.dll +0 -0
  3. mindspore/CppBuildInsights.dll +0 -0
  4. mindspore/CppCoreCheck.dll +0 -0
  5. mindspore/EnumIndex.dll +0 -0
  6. mindspore/EspXEngine.dll +0 -0
  7. mindspore/HResultCheck.dll +0 -0
  8. mindspore/KernelTraceControl.dll +0 -0
  9. mindspore/LocalESPC.dll +0 -0
  10. mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
  11. mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
  12. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  13. mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
  14. mindspore/Newtonsoft.Json.dll +0 -0
  15. mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
  16. mindspore/VariantClear.dll +0 -0
  17. mindspore/__init__.py +9 -4
  18. mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
  19. mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
  20. mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
  21. mindspore/_check_jit_forbidden_api.py +102 -0
  22. mindspore/_checkparam.py +1066 -1001
  23. mindspore/_extends/builtin_operations.py +32 -4
  24. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  25. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  26. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  27. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  28. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  29. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  30. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  31. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  32. mindspore/_extends/parse/__init__.py +5 -3
  33. mindspore/_extends/parse/namespace.py +17 -2
  34. mindspore/_extends/parse/parser.py +193 -34
  35. mindspore/_extends/parse/resources.py +7 -8
  36. mindspore/_extends/parse/standard_method.py +1780 -435
  37. mindspore/_extends/parse/trope.py +3 -1
  38. mindspore/amp.py +53 -58
  39. mindspore/atlprov.dll +0 -0
  40. mindspore/boost/adasum.py +3 -2
  41. mindspore/boost/boost.py +2 -2
  42. mindspore/boost/boost_cell_wrapper.py +46 -26
  43. mindspore/boost/dim_reduce.py +6 -5
  44. mindspore/boost/grad_accumulation.py +2 -1
  45. mindspore/boost/group_loss_scale_manager.py +1 -1
  46. mindspore/c1.dll +0 -0
  47. mindspore/c1xx.dll +0 -0
  48. mindspore/c2.dll +0 -0
  49. mindspore/cfgpersist.dll +0 -0
  50. mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
  51. mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
  52. mindspore/common/__init__.py +11 -10
  53. mindspore/common/_decorator.py +2 -0
  54. mindspore/common/_register_for_adapter.py +55 -0
  55. mindspore/common/_stub_tensor.py +201 -0
  56. mindspore/common/_utils.py +57 -0
  57. mindspore/common/api.py +582 -297
  58. mindspore/common/dtype.py +66 -18
  59. mindspore/common/dump.py +2 -2
  60. mindspore/common/initializer.py +38 -1
  61. mindspore/common/jit_config.py +25 -13
  62. mindspore/common/mutable.py +53 -24
  63. mindspore/common/parameter.py +60 -37
  64. mindspore/common/seed.py +8 -24
  65. mindspore/common/sparse_tensor.py +927 -0
  66. mindspore/common/tensor.py +1627 -3900
  67. mindspore/communication/__init__.py +10 -5
  68. mindspore/communication/_comm_helper.py +78 -214
  69. mindspore/communication/_hccl_management.py +2 -1
  70. mindspore/communication/management.py +136 -47
  71. mindspore/config/op_info.config +501 -1008
  72. mindspore/context.py +291 -56
  73. mindspore/d3dcompiler_47.dll +0 -0
  74. mindspore/dataset/__init__.py +12 -8
  75. mindspore/dataset/audio/__init__.py +9 -9
  76. mindspore/dataset/audio/transforms.py +1090 -228
  77. mindspore/dataset/audio/utils.py +87 -39
  78. mindspore/dataset/audio/validators.py +223 -1
  79. mindspore/dataset/callback/ds_callback.py +17 -15
  80. mindspore/dataset/core/config.py +246 -17
  81. mindspore/dataset/core/py_util_helpers.py +4 -3
  82. mindspore/dataset/core/validator_helpers.py +10 -10
  83. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  84. mindspore/dataset/debug/debug_hook.py +65 -0
  85. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  86. mindspore/dataset/engine/__init__.py +7 -3
  87. mindspore/dataset/engine/cache_client.py +9 -9
  88. mindspore/dataset/engine/datasets.py +648 -477
  89. mindspore/dataset/engine/datasets_audio.py +165 -167
  90. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  91. mindspore/dataset/engine/datasets_text.py +492 -342
  92. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  93. mindspore/dataset/engine/datasets_vision.py +1224 -699
  94. mindspore/dataset/engine/graphdata.py +134 -69
  95. mindspore/dataset/engine/iterators.py +50 -9
  96. mindspore/dataset/engine/offload.py +52 -31
  97. mindspore/dataset/engine/samplers.py +27 -24
  98. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  99. mindspore/dataset/engine/validators.py +213 -52
  100. mindspore/dataset/text/__init__.py +10 -8
  101. mindspore/dataset/text/transforms.py +152 -57
  102. mindspore/dataset/text/utils.py +98 -49
  103. mindspore/dataset/text/validators.py +25 -0
  104. mindspore/dataset/transforms/__init__.py +4 -2
  105. mindspore/dataset/transforms/c_transforms.py +11 -13
  106. mindspore/dataset/transforms/py_transforms.py +2 -2
  107. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  108. mindspore/dataset/transforms/transforms.py +13 -15
  109. mindspore/dataset/transforms/validators.py +7 -7
  110. mindspore/dataset/utils/__init__.py +2 -1
  111. mindspore/dataset/utils/browse_dataset.py +13 -13
  112. mindspore/dataset/utils/line_reader.py +121 -0
  113. mindspore/dataset/vision/__init__.py +8 -7
  114. mindspore/dataset/vision/c_transforms.py +125 -126
  115. mindspore/dataset/vision/py_transforms.py +37 -37
  116. mindspore/dataset/vision/py_transforms_util.py +23 -20
  117. mindspore/dataset/vision/transforms.py +316 -315
  118. mindspore/dataset/vision/utils.py +313 -17
  119. mindspore/dataset/vision/validators.py +6 -6
  120. mindspore/default_config.py +0 -1
  121. mindspore/dpcmi.dll +0 -0
  122. mindspore/{compression → experimental}/__init__.py +6 -5
  123. mindspore/experimental/map_parameter.py +275 -0
  124. mindspore/include/OWNERS +0 -1
  125. mindspore/include/api/callback/callback.h +9 -13
  126. mindspore/include/api/callback/ckpt_saver.h +2 -2
  127. mindspore/include/api/callback/loss_monitor.h +2 -2
  128. mindspore/include/api/callback/lr_scheduler.h +5 -5
  129. mindspore/include/api/callback/time_monitor.h +2 -2
  130. mindspore/include/api/callback/train_accuracy.h +4 -6
  131. mindspore/include/api/cfg.h +19 -6
  132. mindspore/include/api/context.h +70 -9
  133. mindspore/include/api/delegate.h +8 -1
  134. mindspore/include/api/dual_abi_helper.h +8 -24
  135. mindspore/include/api/metrics/accuracy.h +2 -2
  136. mindspore/include/api/metrics/metrics.h +4 -3
  137. mindspore/include/api/model.h +9 -4
  138. mindspore/include/api/model_group.h +68 -0
  139. mindspore/include/api/model_parallel_runner.h +17 -17
  140. mindspore/include/api/net.h +12 -11
  141. mindspore/include/api/serialization.h +20 -4
  142. mindspore/include/api/status.h +7 -1
  143. mindspore/include/api/types.h +25 -21
  144. mindspore/include/api/visible.h +4 -0
  145. mindspore/include/c_api/model_c.h +5 -0
  146. mindspore/include/c_api/status_c.h +1 -1
  147. mindspore/include/dataset/config.h +1 -1
  148. mindspore/include/dataset/constants.h +14 -0
  149. mindspore/include/dataset/text.h +59 -0
  150. mindspore/include/dataset/vision.h +56 -117
  151. mindspore/include/dataset/vision_lite.h +102 -0
  152. mindspore/jpeg62.dll +0 -0
  153. mindspore/log.py +28 -28
  154. mindspore/mindrecord/common/exceptions.py +2 -4
  155. mindspore/mindrecord/filereader.py +19 -1
  156. mindspore/mindrecord/filewriter.py +250 -88
  157. mindspore/mindrecord/mindpage.py +13 -13
  158. mindspore/mindrecord/shardheader.py +15 -15
  159. mindspore/mindrecord/shardreader.py +9 -0
  160. mindspore/mindrecord/shardwriter.py +29 -29
  161. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  162. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  163. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  164. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  165. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  166. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  167. mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
  168. mindspore/mindspore_common.dll +0 -0
  169. mindspore/mindspore_core.dll +0 -0
  170. mindspore/mindspore_glog.dll +0 -0
  171. mindspore/mindspore_shared_lib.dll +0 -0
  172. mindspore/msobj140.dll +0 -0
  173. mindspore/mspdb140.dll +0 -0
  174. mindspore/mspdbcore.dll +0 -0
  175. mindspore/mspdbst.dll +0 -0
  176. mindspore/mspft140.dll +0 -0
  177. mindspore/msvcdis140.dll +0 -0
  178. mindspore/msvcp140_1.dll +0 -0
  179. mindspore/msvcp140_2.dll +0 -0
  180. mindspore/msvcp140_atomic_wait.dll +0 -0
  181. mindspore/msvcp140_codecvt_ids.dll +0 -0
  182. mindspore/nn/__init__.py +1 -5
  183. mindspore/nn/cell.py +297 -234
  184. mindspore/nn/dynamic_lr.py +1 -1
  185. mindspore/nn/grad/cell_grad.py +17 -42
  186. mindspore/nn/layer/__init__.py +7 -4
  187. mindspore/nn/layer/activation.py +131 -88
  188. mindspore/nn/layer/basic.py +313 -613
  189. mindspore/nn/layer/channel_shuffle.py +103 -0
  190. mindspore/nn/layer/combined.py +1 -1
  191. mindspore/nn/layer/container.py +52 -6
  192. mindspore/nn/layer/conv.py +112 -43
  193. mindspore/nn/layer/dense.py +10 -9
  194. mindspore/nn/layer/embedding.py +36 -34
  195. mindspore/nn/layer/image.py +123 -27
  196. mindspore/nn/layer/math.py +108 -107
  197. mindspore/nn/layer/normalization.py +212 -366
  198. mindspore/nn/layer/padding.py +370 -42
  199. mindspore/nn/layer/pooling.py +1443 -219
  200. mindspore/nn/layer/rnn_cells.py +11 -16
  201. mindspore/nn/layer/rnns.py +38 -39
  202. mindspore/nn/layer/thor_layer.py +24 -25
  203. mindspore/nn/layer/timedistributed.py +5 -5
  204. mindspore/nn/layer/transformer.py +701 -0
  205. mindspore/nn/learning_rate_schedule.py +8 -8
  206. mindspore/nn/loss/__init__.py +9 -6
  207. mindspore/nn/loss/loss.py +678 -142
  208. mindspore/nn/metrics.py +53 -0
  209. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  210. mindspore/nn/optim/ada_grad.py +8 -8
  211. mindspore/nn/optim/adadelta.py +2 -3
  212. mindspore/nn/optim/adafactor.py +18 -14
  213. mindspore/nn/optim/adam.py +429 -87
  214. mindspore/nn/optim/adamax.py +5 -6
  215. mindspore/nn/optim/adasum.py +10 -8
  216. mindspore/nn/optim/asgd.py +7 -7
  217. mindspore/nn/optim/ftrl.py +81 -11
  218. mindspore/nn/optim/lamb.py +7 -8
  219. mindspore/nn/optim/lars.py +4 -4
  220. mindspore/nn/optim/lazyadam.py +82 -7
  221. mindspore/nn/optim/momentum.py +8 -7
  222. mindspore/nn/optim/optimizer.py +19 -10
  223. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  224. mindspore/nn/optim/rmsprop.py +3 -3
  225. mindspore/nn/optim/rprop.py +20 -16
  226. mindspore/nn/optim/sgd.py +21 -15
  227. mindspore/nn/optim/thor.py +23 -21
  228. mindspore/nn/probability/__init__.py +0 -2
  229. mindspore/nn/probability/bijector/bijector.py +7 -6
  230. mindspore/nn/probability/bijector/invert.py +4 -2
  231. mindspore/nn/probability/bijector/softplus.py +2 -2
  232. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  233. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  234. mindspore/nn/probability/distribution/__init__.py +6 -0
  235. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  236. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  237. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  238. mindspore/nn/probability/distribution/beta.py +1 -1
  239. mindspore/nn/probability/distribution/categorical.py +9 -9
  240. mindspore/nn/probability/distribution/cauchy.py +8 -8
  241. mindspore/nn/probability/distribution/distribution.py +12 -6
  242. mindspore/nn/probability/distribution/exponential.py +5 -5
  243. mindspore/nn/probability/distribution/gamma.py +3 -3
  244. mindspore/nn/probability/distribution/geometric.py +6 -5
  245. mindspore/nn/probability/distribution/gumbel.py +5 -5
  246. mindspore/nn/probability/distribution/half_normal.py +133 -0
  247. mindspore/nn/probability/distribution/laplace.py +128 -0
  248. mindspore/nn/probability/distribution/log_normal.py +0 -1
  249. mindspore/nn/probability/distribution/logistic.py +4 -5
  250. mindspore/nn/probability/distribution/normal.py +11 -15
  251. mindspore/nn/probability/distribution/poisson.py +6 -2
  252. mindspore/nn/probability/distribution/student_t.py +150 -0
  253. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  254. mindspore/nn/probability/distribution/uniform.py +5 -5
  255. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  256. mindspore/nn/reinforcement/tensor_array.py +2 -2
  257. mindspore/nn/sparse/sparse.py +8 -1
  258. mindspore/nn/wrap/cell_wrapper.py +55 -27
  259. mindspore/nn/wrap/grad_reducer.py +20 -11
  260. mindspore/nn/wrap/loss_scale.py +47 -30
  261. mindspore/numpy/array_creations.py +33 -22
  262. mindspore/numpy/array_ops.py +46 -42
  263. mindspore/numpy/logic_ops.py +6 -27
  264. mindspore/numpy/math_ops.py +26 -19
  265. mindspore/numpy/utils.py +1 -8
  266. mindspore/numpy/utils_const.py +112 -62
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/opencv_imgproc452.dll +0 -0
  270. mindspore/ops/__init__.py +6 -3
  271. mindspore/ops/_constants.py +0 -6
  272. mindspore/ops/_grad/__init__.py +2 -1
  273. mindspore/ops/_grad/grad_array_ops.py +209 -152
  274. mindspore/ops/_grad/grad_base.py +55 -17
  275. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  276. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  277. mindspore/ops/_grad/grad_implementations.py +21 -61
  278. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  279. mindspore/ops/_grad/grad_math_ops.py +306 -161
  280. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  281. mindspore/ops/_grad/grad_other_ops.py +1 -1
  282. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  283. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  284. mindspore/ops/_grad/grad_sparse.py +15 -9
  285. mindspore/ops/_grad_experimental/__init__.py +1 -0
  286. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  287. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  288. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  289. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  290. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  291. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  292. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  293. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  294. mindspore/ops/_op_impl/__init__.py +3 -3
  295. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  296. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  297. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  298. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  299. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  300. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  302. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  303. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  304. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  305. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  307. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  314. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  315. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  317. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  318. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  320. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  321. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  322. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  326. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  327. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  328. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  329. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  330. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  331. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  332. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  335. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  336. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  337. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  338. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  339. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  340. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  341. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  342. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  343. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  344. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  345. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  346. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  347. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  349. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  350. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  351. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  352. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  353. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  354. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  355. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  356. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  357. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  358. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  359. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  360. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  361. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  362. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  363. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  365. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  366. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  367. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  368. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  369. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  370. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  371. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  372. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  373. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  374. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  375. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  376. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  377. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  378. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  379. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  380. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  381. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  382. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  383. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  384. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  385. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  386. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  387. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  388. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  389. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  390. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  391. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  392. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  393. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  394. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  395. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  396. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  397. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  398. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  399. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  400. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  401. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  402. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  403. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  404. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  405. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  407. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  408. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  409. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  410. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  411. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  412. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  413. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  414. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  415. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  416. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  417. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  418. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  419. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  420. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  421. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  422. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  423. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  424. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  425. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  426. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  427. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  428. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  429. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  430. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  431. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  432. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  433. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  434. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  435. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  436. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  437. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  441. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  442. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  443. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  444. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  445. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  446. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  447. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  448. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  449. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  450. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  451. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  452. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  453. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  454. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  455. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  456. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  457. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  458. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  459. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  460. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  461. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  462. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  463. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  464. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  465. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  466. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  467. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  468. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  469. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  470. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  471. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  472. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  473. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  474. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  475. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  476. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  477. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  478. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  479. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  480. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  481. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  482. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  483. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  484. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  485. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  486. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  487. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  488. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  489. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  490. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  491. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  492. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  493. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  494. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  495. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  496. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  497. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  498. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  499. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  500. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  501. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  502. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  503. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  504. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  505. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  506. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  507. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  508. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  509. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  510. mindspore/ops/_primitive_cache.py +3 -2
  511. mindspore/ops/_register_for_op.py +11 -0
  512. mindspore/ops/_utils/__init__.py +1 -1
  513. mindspore/ops/_utils/utils.py +20 -41
  514. mindspore/ops/_vmap/__init__.py +2 -2
  515. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  516. mindspore/ops/_vmap/vmap_base.py +24 -10
  517. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  518. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  519. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  520. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  521. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  522. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  523. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  524. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  525. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  526. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  529. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  530. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  531. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  532. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  533. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  534. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  535. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  537. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  538. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  539. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  541. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  542. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  543. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  544. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  545. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  546. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  547. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  548. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  549. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  550. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  551. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  552. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  553. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  554. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  555. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  556. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  559. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  560. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  561. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  563. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  565. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  566. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  568. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  569. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  570. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  571. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  572. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  573. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  574. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  576. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  577. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  578. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  579. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  580. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  582. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  583. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  584. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  585. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  586. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  587. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  588. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  589. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  590. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  591. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  592. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  593. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  594. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  595. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  596. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  597. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  598. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  599. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  600. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  601. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  604. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  605. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  606. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  607. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  609. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  610. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  611. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  612. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  613. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  614. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  615. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  616. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  617. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  618. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  619. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  620. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  621. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  622. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  623. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  624. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  625. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  626. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  627. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  628. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  629. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  630. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  632. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  633. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  634. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  635. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  637. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  638. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  639. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  640. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  643. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  644. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  645. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  646. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  647. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  648. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  649. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  650. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  651. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  652. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  653. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  654. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  655. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  656. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  657. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  658. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  660. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  661. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  662. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  663. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  664. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  665. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  666. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  667. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  668. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  669. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  670. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  671. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  672. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  673. mindspore/ops/bprop_mindir/__init__.py +1 -4
  674. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  675. mindspore/ops/composite/__init__.py +12 -13
  676. mindspore/ops/composite/base.py +261 -254
  677. mindspore/ops/composite/env_ops.py +41 -0
  678. mindspore/ops/composite/math_ops.py +197 -156
  679. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  680. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  681. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  682. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  683. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  684. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  685. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  686. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  687. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  688. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  689. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  690. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  691. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  692. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  693. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  694. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  695. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  696. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  697. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  698. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  699. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  700. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  701. mindspore/ops/function/__init__.py +323 -8
  702. mindspore/ops/function/array_func.py +3511 -780
  703. mindspore/ops/function/clip_func.py +329 -0
  704. mindspore/ops/function/debug_func.py +6 -6
  705. mindspore/ops/function/grad/__init__.py +5 -1
  706. mindspore/ops/function/grad/grad_func.py +736 -65
  707. mindspore/ops/function/image_func.py +270 -0
  708. mindspore/ops/function/linalg_func.py +268 -8
  709. mindspore/ops/function/math_func.py +8032 -3164
  710. mindspore/ops/function/nn_func.py +5619 -1855
  711. mindspore/ops/function/other_func.py +115 -0
  712. mindspore/ops/function/parameter_func.py +11 -10
  713. mindspore/ops/function/random_func.py +939 -77
  714. mindspore/ops/function/sparse_func.py +249 -84
  715. mindspore/ops/function/sparse_unary_func.py +2303 -0
  716. mindspore/ops/function/spectral_func.py +146 -0
  717. mindspore/ops/function/vmap_func.py +114 -0
  718. mindspore/ops/functional.py +182 -254
  719. mindspore/ops/op_info_register.py +79 -34
  720. mindspore/ops/operations/__init__.py +210 -118
  721. mindspore/ops/operations/_csr_ops.py +7 -7
  722. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  723. mindspore/ops/operations/_grad_ops.py +447 -322
  724. mindspore/ops/operations/_inner_ops.py +547 -176
  725. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  726. mindspore/ops/operations/_ms_kernel.py +29 -27
  727. mindspore/ops/operations/_ocr_ops.py +11 -11
  728. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  729. mindspore/ops/operations/_quant_ops.py +186 -101
  730. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  731. mindspore/ops/operations/_scalar_ops.py +466 -0
  732. mindspore/ops/operations/_sequence_ops.py +1047 -0
  733. mindspore/ops/operations/_tensor_array.py +10 -11
  734. mindspore/ops/operations/_thor_ops.py +4 -4
  735. mindspore/ops/operations/array_ops.py +1428 -1226
  736. mindspore/ops/operations/comm_ops.py +180 -117
  737. mindspore/ops/operations/control_ops.py +4 -2
  738. mindspore/ops/operations/custom_ops.py +185 -98
  739. mindspore/ops/operations/debug_ops.py +92 -54
  740. mindspore/ops/operations/image_ops.py +406 -211
  741. mindspore/ops/operations/inner_ops.py +42 -53
  742. mindspore/ops/operations/linalg_ops.py +32 -29
  743. mindspore/ops/operations/math_ops.py +2076 -897
  744. mindspore/ops/operations/nn_ops.py +1282 -1252
  745. mindspore/ops/operations/other_ops.py +124 -278
  746. mindspore/ops/operations/random_ops.py +345 -178
  747. mindspore/ops/operations/rl_ops.py +8 -9
  748. mindspore/ops/operations/sparse_ops.py +502 -157
  749. mindspore/ops/operations/spectral_ops.py +107 -0
  750. mindspore/ops/primitive.py +192 -15
  751. mindspore/ops/vm_impl_registry.py +23 -2
  752. mindspore/parallel/__init__.py +6 -1
  753. mindspore/parallel/_auto_parallel_context.py +199 -92
  754. mindspore/parallel/_cell_wrapper.py +4 -2
  755. mindspore/parallel/_cost_model_context.py +3 -0
  756. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  757. mindspore/parallel/_offload_context.py +185 -0
  758. mindspore/parallel/_parallel_serialization.py +167 -28
  759. mindspore/parallel/_ps_context.py +9 -5
  760. mindspore/parallel/_recovery_context.py +1 -1
  761. mindspore/parallel/_tensor.py +9 -1
  762. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  763. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  764. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  765. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  766. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  767. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  768. mindspore/parallel/_utils.py +47 -7
  769. mindspore/parallel/algo_parameter_config.py +5 -1
  770. mindspore/parallel/checkpoint_transform.py +329 -0
  771. mindspore/parallel/shard.py +229 -0
  772. mindspore/perf_msvcbuildinsights.dll +0 -0
  773. mindspore/pgodb140.dll +0 -0
  774. mindspore/pgort140.dll +0 -0
  775. mindspore/profiler/__init__.py +2 -1
  776. mindspore/profiler/common/util.py +4 -3
  777. mindspore/profiler/common/validator/validate_path.py +2 -2
  778. mindspore/profiler/envprofiling.py +249 -0
  779. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  780. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  781. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  782. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  783. mindspore/profiler/parser/framework_parser.py +42 -16
  784. mindspore/profiler/parser/hccl_parser.py +158 -158
  785. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  786. mindspore/profiler/parser/integrator.py +18 -1579
  787. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  788. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  789. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  790. mindspore/profiler/parser/optime_parser.py +17 -18
  791. mindspore/profiler/parser/profiler_info.py +108 -0
  792. mindspore/profiler/parser/step_trace_parser.py +1 -1
  793. mindspore/profiler/profiling.py +396 -194
  794. mindspore/rewrite/__init__.py +6 -2
  795. mindspore/rewrite/api/node.py +51 -110
  796. mindspore/rewrite/api/node_type.py +10 -6
  797. mindspore/rewrite/api/pattern_engine.py +51 -7
  798. mindspore/rewrite/api/scoped_value.py +64 -53
  799. mindspore/rewrite/api/symbol_tree.py +108 -61
  800. mindspore/rewrite/api/tree_node_helper.py +2 -3
  801. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  802. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  803. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  804. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  805. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  806. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  807. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  808. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  809. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  810. mindspore/rewrite/common/__init__.py +2 -0
  811. mindspore/rewrite/common/event.py +1 -1
  812. mindspore/rewrite/common/observable.py +1 -1
  813. mindspore/rewrite/common/observer.py +1 -1
  814. mindspore/rewrite/common/rewrite_elog.py +35 -0
  815. mindspore/rewrite/namer.py +2 -2
  816. mindspore/rewrite/namespace.py +14 -4
  817. mindspore/rewrite/node.py +161 -13
  818. mindspore/rewrite/parser.py +0 -1
  819. mindspore/rewrite/parser_register.py +0 -1
  820. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  821. mindspore/rewrite/parsers/assign_parser.py +267 -67
  822. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  823. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  824. mindspore/rewrite/parsers/constant_parser.py +101 -0
  825. mindspore/rewrite/parsers/container_parser.py +88 -0
  826. mindspore/rewrite/parsers/for_parser.py +28 -15
  827. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  828. mindspore/rewrite/parsers/if_parser.py +11 -28
  829. mindspore/rewrite/parsers/module_parser.py +9 -6
  830. mindspore/rewrite/parsers/return_parser.py +3 -2
  831. mindspore/rewrite/sparsify/__init__.py +0 -0
  832. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  833. mindspore/rewrite/sparsify/sparsify.py +109 -0
  834. mindspore/rewrite/sparsify/utils.py +173 -0
  835. mindspore/rewrite/symbol_tree.py +322 -109
  836. mindspore/rewrite/symbol_tree_builder.py +45 -8
  837. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  838. mindspore/rewrite/topological_manager.py +1 -2
  839. mindspore/run_check/_check_version.py +209 -112
  840. mindspore/run_check/run_check.py +2 -1
  841. mindspore/tbbmalloc.dll +0 -0
  842. mindspore/tinyxml2.dll +0 -0
  843. mindspore/train/__init__.py +6 -4
  844. mindspore/train/_utils.py +28 -5
  845. mindspore/train/amp.py +321 -50
  846. mindspore/train/callback/__init__.py +3 -1
  847. mindspore/train/callback/_backup_and_restore.py +120 -0
  848. mindspore/train/callback/_callback.py +8 -8
  849. mindspore/train/callback/_checkpoint.py +12 -9
  850. mindspore/train/callback/_early_stop.py +13 -7
  851. mindspore/train/callback/_history.py +8 -8
  852. mindspore/train/callback/_lambda_callback.py +6 -6
  853. mindspore/train/callback/_landscape.py +36 -38
  854. mindspore/train/callback/_loss_monitor.py +12 -6
  855. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  856. mindspore/train/callback/_on_request_exit.py +212 -0
  857. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  858. mindspore/train/callback/_summary_collector.py +27 -19
  859. mindspore/train/callback/_time_monitor.py +13 -7
  860. mindspore/train/checkpoint_pb2.py +68 -8
  861. mindspore/train/data_sink.py +122 -33
  862. mindspore/train/dataset_helper.py +28 -87
  863. mindspore/train/loss_scale_manager.py +4 -7
  864. mindspore/{nn → train}/metrics/__init__.py +20 -20
  865. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  866. mindspore/{nn → train}/metrics/auc.py +4 -4
  867. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  868. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  869. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  870. mindspore/{nn → train}/metrics/dice.py +6 -5
  871. mindspore/{nn → train}/metrics/error.py +7 -5
  872. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  873. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  874. mindspore/{nn → train}/metrics/loss.py +4 -3
  875. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  876. mindspore/{nn → train}/metrics/metric.py +6 -5
  877. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  878. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  879. mindspore/{nn → train}/metrics/precision.py +5 -4
  880. mindspore/{nn → train}/metrics/recall.py +5 -4
  881. mindspore/{nn → train}/metrics/roc.py +7 -6
  882. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  883. mindspore/{nn → train}/metrics/topk.py +7 -5
  884. mindspore/train/mind_ir_pb2.py +339 -32
  885. mindspore/train/model.py +113 -84
  886. mindspore/train/serialization.py +547 -167
  887. mindspore/train/summary/_summary_adapter.py +1 -1
  888. mindspore/train/summary/summary_record.py +43 -12
  889. mindspore/train/train_thor/convert_utils.py +7 -1
  890. mindspore/train/train_thor/dataset_helper.py +3 -3
  891. mindspore/train/train_thor/model_thor.py +0 -4
  892. mindspore/turbojpeg.dll +0 -0
  893. mindspore/vcmeta.dll +0 -0
  894. mindspore/vcruntime140.dll +0 -0
  895. mindspore/vcruntime140_1.dll +0 -0
  896. mindspore/version.py +1 -1
  897. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  898. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
  899. mindspore/compression/common/constant.py +0 -124
  900. mindspore/compression/export/__init__.py +0 -19
  901. mindspore/compression/export/quant_export.py +0 -514
  902. mindspore/compression/quant/qat.py +0 -636
  903. mindspore/compression/quant/quant_utils.py +0 -462
  904. mindspore/compression/quant/quantizer.py +0 -68
  905. mindspore/libatomic-1.dll +0 -0
  906. mindspore/libgcc_s_seh-1.dll +0 -0
  907. mindspore/libgfortran-4.dll +0 -0
  908. mindspore/libgomp-1.dll +0 -0
  909. mindspore/libjpeg-62.dll +0 -0
  910. mindspore/libmindspore.dll +0 -0
  911. mindspore/libmindspore_common.dll +0 -0
  912. mindspore/libmindspore_core.dll +0 -0
  913. mindspore/libmindspore_glog.dll +0 -0
  914. mindspore/libnnacl.dll +0 -0
  915. mindspore/libopencv_core452.dll +0 -0
  916. mindspore/libopencv_imgcodecs452.dll +0 -0
  917. mindspore/libopencv_imgproc452.dll +0 -0
  918. mindspore/libquadmath-0.dll +0 -0
  919. mindspore/libsqlite3.dll +0 -0
  920. mindspore/libssp-0.dll +0 -0
  921. mindspore/libstdc++-6.dll +0 -0
  922. mindspore/libtinyxml2.dll +0 -0
  923. mindspore/libturbojpeg.dll +0 -0
  924. mindspore/libwinpthread-1.dll +0 -0
  925. mindspore/nn/layer/quant.py +0 -1868
  926. mindspore/nn/layer/rnn_utils.py +0 -90
  927. mindspore/nn/probability/dpn/__init__.py +0 -22
  928. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  929. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  930. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  931. mindspore/nn/probability/infer/__init__.py +0 -22
  932. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  933. mindspore/nn/probability/infer/variational/svi.py +0 -84
  934. mindspore/nn/probability/toolbox/__init__.py +0 -22
  935. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  936. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  937. mindspore/nn/probability/transforms/__init__.py +0 -22
  938. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  939. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  940. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  941. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  942. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  943. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  944. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  945. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  946. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  947. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  948. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  949. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  950. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  951. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  952. mindspore/ops/composite/array_ops.py +0 -210
  953. mindspore/ops/composite/clip_ops.py +0 -238
  954. mindspore/ops/composite/random_ops.py +0 -426
  955. mindspore/ops/composite/vmap_ops.py +0 -38
  956. mindspore/ops/operations/sponge_ops.py +0 -3531
  957. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  958. mindspore/parallel/nn/__init__.py +0 -42
  959. mindspore/parallel/nn/loss.py +0 -22
  960. mindspore/parallel/nn/moe.py +0 -21
  961. mindspore/parallel/nn/op_parallel_config.py +0 -22
  962. mindspore/parallel/nn/transformer.py +0 -31
  963. mindspore/run_check/_check_deps_version.py +0 -84
  964. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  965. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  966. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -16,22 +16,13 @@
16
16
  import csv
17
17
  import json
18
18
  import os
19
- import stat
20
19
  from decimal import Decimal
21
20
  from enum import Enum
22
21
  import sys
23
-
24
22
  from mindspore import log as logger
25
- from mindspore import context
26
- from mindspore.context import get_auto_parallel_context
27
- from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
28
- ProfilerFileNotFoundException, ProfilerRawFileException, ProfilerParamValueErrorException
23
+ from mindspore.profiler.common.exceptions.exceptions import ProfilerRawFileException
29
24
  from mindspore.profiler.common.util import query_latest_trace_time_file, to_int, to_millisecond
30
25
  from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
31
- from mindspore.profiler.parser.container import TimelineContainer
32
- from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
33
-
34
- SIZE_LIMIT_DEFAULT = 20 * 1024 * 1024 # 20MB
35
26
 
36
27
 
37
28
  class Integrator:
@@ -46,9 +37,8 @@ class Integrator:
46
37
  _file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
47
38
  _file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
48
39
  _file_name_framework = 'framework_raw_{}.csv'
49
- _header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
50
- 'percent']
51
- _header_aicore_detail = ['full_op_name', 'execution_time']
40
+ _header_aicore_type = ['op_type', 'total_time', 'execution_frequency', 'percent']
41
+ _header_aicore_detail = ['full_op_name', 'execution_time', 'execution_frequency']
52
42
  _header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
53
43
  'execution_time', 'run_start', 'run_end']
54
44
 
@@ -62,13 +52,13 @@ class Integrator:
62
52
  _aicore_trace_data = []
63
53
 
64
54
  def __init__(self, profiling_dir, device_id):
55
+ csv.field_size_limit(sys.maxsize)
65
56
  self._profiling_dir = profiling_dir
66
57
  self._device_id = device_id
67
58
  self._op_time_cache = {}
68
59
  self._total_time = Decimal('0.0')
69
60
  self._column = ""
70
61
  self._result = []
71
- csv.field_size_limit(sys.maxsize)
72
62
 
73
63
  @staticmethod
74
64
  def _is_match_condition(exp_key, exp_value, actual_value):
@@ -150,16 +140,17 @@ class Integrator:
150
140
  op_name_type_cache[row[3]] = row[5]
151
141
 
152
142
  op_type_time_cache = {}
153
- for full_op_name, op_time in self._op_time_cache.items():
143
+ total_time = 0
144
+ for full_op_name, op_info in self._op_time_cache.items():
145
+ total_time += op_info[0] * op_info[1]
154
146
  op_type = op_name_type_cache.get(full_op_name)
155
147
  op_type_time = op_type_time_cache.get(op_type)
156
148
  if not op_type_time:
157
- op_type_time = [op_time, 1]
149
+ op_type_time = [op_info[0] * op_info[1], op_info[1]]
158
150
  op_type_time_cache[op_type] = op_type_time
159
151
  else:
160
- op_type_time[0] += op_time
161
- op_type_time[1] += 1
162
-
152
+ op_type_time[0] += op_info[0] * op_info[1]
153
+ op_type_time[1] += op_info[1]
163
154
  op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
164
155
  op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
165
156
  with open(op_type_file_path, 'w') as type_file:
@@ -167,10 +158,11 @@ class Integrator:
167
158
  csv_writer.writerow(self._header_aicore_type)
168
159
 
169
160
  for op_type, op_type_time_info in op_type_time_cache.items():
170
- type_info = [
171
- op_type, op_type_time_info[0], op_type_time_info[1],
172
- round((op_type_time_info[0] / self._total_time) * 100, 2)
173
- ]
161
+ if total_time != 0:
162
+ type_info = [
163
+ op_type, op_type_time_info[0], op_type_time_info[1],
164
+ round((op_type_time_info[0] / total_time) * 100, 2)
165
+ ]
174
166
  csv_writer.writerow(type_info)
175
167
 
176
168
  def _parse_aicore_detail_time(self):
@@ -210,8 +202,8 @@ class Integrator:
210
202
  if op_infos[0] == 'total':
211
203
  self._total_time = Decimal(op_infos[2])
212
204
  continue
213
- self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
214
- csv_writer.writerow([op_infos[0], op_infos[1]])
205
+ self._op_time_cache[op_infos[0]] = [Decimal(op_infos[1]), int(op_infos[3])]
206
+ csv_writer.writerow([op_infos[0], op_infos[1], op_infos[3]])
215
207
 
216
208
  def _parse_aicpu_time(self):
217
209
  """Parse the parsed AICPU operator time file."""
@@ -303,7 +295,7 @@ class Integrator:
303
295
  """Load data according to the parsed AICORE operator types file."""
304
296
  file_path = query_latest_trace_time_file(self._profiling_dir, int(self._device_id))
305
297
  if not file_path:
306
- logger.warning("Failed to find parsed trace time file. Dynamic Shape networks can ignore this warning.")
298
+ logger.warning("Failed to find parsed trace time file.")
307
299
  return
308
300
  file_path = validate_and_normalize_path(file_path)
309
301
  with open(file_path, 'r') as handle:
@@ -519,1556 +511,3 @@ class DeviceTarget(Enum):
519
511
  CPU = 'cpu'
520
512
  GPU = 'gpu'
521
513
  ASCEND = 'ascend'
522
-
523
-
524
- class BaseTimelineGenerator:
525
- """
526
- Analyse timeline data from file.
527
- """
528
- # AI Core Op pid is device_id
529
- _AI_CPU_PID = 9000
530
- _COMMUNICATION_OP_PID = 10000
531
- _HOST_CPU_PID = 11000
532
- _OP_OVERLAP_PID = 12000
533
-
534
- _OP_GPU_ACTIVITY_PID = 13000
535
-
536
- _RECEIVE_ALONE = 7997
537
- _ALLREDUCE_ALONE = 7998
538
- _MERGED_COMPUTATION_TID = 7999
539
- _PURE_COMMUNICATION_TID = 8000
540
- _MERGED_COMMUNICATION_TID = 8001
541
- _FREE_TIME_TID = 8002
542
- _STEPS_TID = 100000
543
- _SCOPE_NAME_TID = 100001
544
- _GPU_OP_TID = 100002
545
- _HOST_CPU_OP_TID = 100003
546
- _SINGLE_TID = 0
547
-
548
- _STEPS_SORT_INDEX = -4
549
-
550
- _output_timeline_data_file_path = 'output_timeline_data_{}.txt'
551
- _timeline_meta = []
552
- _format_meta_data_list = []
553
- _thread_processed_list = []
554
-
555
- _map_tid_name_to_int = {
556
- "Steps": (-4, _STEPS_TID),
557
- "Scope Name": (-3, _SCOPE_NAME_TID),
558
- "GpuOps": (-2, _GPU_OP_TID),
559
- "HostCpuOps": (-1, _HOST_CPU_OP_TID)
560
- }
561
- _timeline_summary = {
562
- 'total_time': 0,
563
- 'num_of_streams': 0,
564
- 'num_of_ops': 0,
565
- 'op_exe_times': 0,
566
- 'max_scope_name_num': 0,
567
- }
568
- _op_name_idx, _tid_idx, _start_time_idx, _duration_idx = 0, 1, 2, 3
569
- _max_scope_name_num = 0
570
- _host_cpu_op_label = 'Host CPU OP'
571
- _gpu_op_label = "GPU Op"
572
- _ascend_op_label = "Ascend Op"
573
- _aicore_op_label = "AICORE OP"
574
- _aicpu_op_label = "AICPU OP"
575
-
576
- _device_id = 0
577
- _profiling_dir = ""
578
- _timeline_summary_filename = ""
579
- _display_filename = ""
580
- _op_name_list = []
581
- _device_target = DeviceTarget.ASCEND.value
582
- _model = context.GRAPH_MODE
583
-
584
- __col_names = ['op_name', 'stream_id', 'start_time', 'duration']
585
-
586
- def __init__(self, device_target, model):
587
- self._tid_dict = {
588
- "computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
589
- "communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
590
- "communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
591
- "free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
592
- }
593
- self._device_target = str(device_target).lower()
594
- self._model = model
595
- self._step_start_op_name = ""
596
- self._step_end_op_name = ""
597
-
598
- @staticmethod
599
- def get_parallel_context():
600
- """Get parallel context."""
601
- try:
602
- parallel_mode = get_auto_parallel_context("parallel_mode")
603
- stage_num = get_auto_parallel_context("pipeline_stages")
604
- except RuntimeError:
605
- logger.warning("[profiler] the feature of cluster bottleneck analyse "
606
- "is not supported in offline parse mode.")
607
- parallel_mode = "data_parallel"
608
- stage_num = 1
609
- if stage_num > 1:
610
- parallel_mode = "pipeline-parallel"
611
- elif parallel_mode != "data_parallel":
612
- parallel_mode = "model-parallel"
613
- else:
614
- parallel_mode = "data-parallel"
615
- return parallel_mode, stage_num
616
-
617
- @staticmethod
618
- def _update_num_of_streams(timeline, stream_count_dict):
619
- """Update number of streams."""
620
- stream_id = timeline[1]
621
- if stream_id in ["Steps", "Scope Name"]:
622
- return
623
- if stream_id not in stream_count_dict.keys():
624
- stream_count_dict[stream_id] = 1
625
- else:
626
- stream_count_dict[stream_id] += 1
627
-
628
- def get_thread_label_name(self):
629
- """Get process and thread config."""
630
- device_process_label = self._get_device_process_label()
631
- return [
632
- {"name": "process_labels", "ph": "M", "pid": self._device_id, "args": {"labels": device_process_label}},
633
- {"name": "process_labels", "ph": "M", "pid": self._AI_CPU_PID, "args": {"labels": self._aicpu_op_label}},
634
- {"name": "process_labels", "ph": "M", "pid": self._COMMUNICATION_OP_PID,
635
- "args": {"labels": "Communication Op"}},
636
- {"name": "process_labels", "ph": "M", "pid": self._HOST_CPU_PID,
637
- "args": {"labels": self._host_cpu_op_label}},
638
- {"name": "process_labels", "ph": "M", "pid": self._OP_OVERLAP_PID,
639
- "args": {"labels": "Op Overlap Analyse"}},
640
- {"name": "process_labels", "ph": "M", "pid": self._OP_GPU_ACTIVITY_PID,
641
- "args": {"labels": "Activity Op"}},
642
-
643
- {"name": "process_sort_index", "ph": "M", "pid": self._device_id, "args": {"sort_index": 0}},
644
- {"name": "process_sort_index", "ph": "M", "pid": self._AI_CPU_PID, "args": {"sort_index": 10}},
645
- {"name": "process_sort_index", "ph": "M", "pid": self._COMMUNICATION_OP_PID, "args": {"sort_index": 20}},
646
- {"name": "process_sort_index", "ph": "M", "pid": self._HOST_CPU_PID, "args": {"sort_index": 30}},
647
- {"name": "process_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "args": {"sort_index": 40}},
648
-
649
- {"name": "thread_name", "ph": "M", "pid": self._HOST_CPU_PID, "tid": self._HOST_CPU_OP_TID,
650
- "args": {"name": "Host CPU Op"}},
651
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
652
- "args": {"name": "Merged Computation Op"}},
653
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
654
- "args": {"name": "Pure Communication Op"}},
655
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
656
- "args": {"name": "Merged Communication Op"}},
657
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
658
- "args": {"name": "Free Time"}},
659
- {"name": "thread_name", "ph": "M", "pid": self._device_id, "tid": self._STEPS_TID,
660
- "args": {"name": "Steps"}},
661
- {"name": "thread_name", "ph": "M", "pid": self._device_id, "tid": self._SINGLE_TID,
662
- "args": {"name": "Ops"}},
663
-
664
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
665
- "args": {"sort_index": self._MERGED_COMPUTATION_TID}},
666
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
667
- "args": {"sort_index": self._PURE_COMMUNICATION_TID}},
668
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
669
- "args": {"sort_index": self._MERGED_COMMUNICATION_TID}},
670
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
671
- "args": {"sort_index": self._FREE_TIME_TID}},
672
- {"name": "thread_sort_index", "ph": "M", "pid": self._device_id, "tid": self._STEPS_TID,
673
- "args": {"sort_index": self._STEPS_SORT_INDEX}},
674
- ]
675
-
676
- def write_timeline(self, size_limit=SIZE_LIMIT_DEFAULT):
677
- """Load data according to the parsed profiling files."""
678
- # Write timeline to file.
679
- logger.info('Writing timeline file...')
680
- timeline_meta = self.write_timeline_to_json_by_limitation(size_limit)
681
- logger.info('Finished file writing!')
682
- return timeline_meta
683
-
684
- def write_timeline_to_json_by_limitation(self, size_limit):
685
- """Write timeline to json by limitation."""
686
- display_file_path = os.path.join(
687
- self._profiling_dir,
688
- self._display_filename
689
- )
690
- display_file_path = validate_and_normalize_path(display_file_path)
691
-
692
- try:
693
- with open(display_file_path, 'w') as json_file:
694
- json_file.write('[')
695
- for _, item in enumerate(self._timeline_meta):
696
- json.dump(item, json_file)
697
- if "scope_level" in item.keys():
698
- self._max_scope_name_num = max(
699
- self._max_scope_name_num, item["scope_level"] + 1)
700
- file_size = os.path.getsize(display_file_path)
701
- json_file.write(',')
702
- if file_size > size_limit:
703
- break
704
- label_name_json = json.dumps(self.get_thread_label_name())
705
- label_name_json = label_name_json.lstrip('[')
706
- json_file.write(label_name_json)
707
- os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
708
- return self._timeline_meta
709
- except (IOError, OSError) as err:
710
- logger.critical('Error occurred when write timeline display file: %s', err)
711
- raise ProfilerIOException() from err
712
-
713
- def write_timeline_summary(self):
714
- """Write timeline summary to json."""
715
- timeline_summary_file_path = os.path.join(
716
- self._profiling_dir,
717
- self._timeline_summary_filename
718
- )
719
-
720
- timeline_summary_file_path = validate_and_normalize_path(timeline_summary_file_path)
721
-
722
- try:
723
- with open(timeline_summary_file_path, 'w') as json_file:
724
- json.dump(self._timeline_summary, json_file)
725
- os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
726
- except (IOError, OSError) as err:
727
- logger.critical('Error occurred when write timeline summary file: %s', err)
728
- raise ProfilerIOException() from err
729
-
730
- def _get_device_process_label(self):
731
- """Get device process label."""
732
- device_process_label = self._aicore_op_label
733
- if self._device_target == DeviceTarget.ASCEND.value:
734
- if self._model == context.GRAPH_MODE:
735
- device_process_label = self._aicore_op_label
736
- elif self._model == context.PYNATIVE_MODE:
737
- device_process_label = self._ascend_op_label
738
- elif self._device_target == DeviceTarget.GPU.value:
739
- device_process_label = self._gpu_op_label
740
- elif self._device_target == DeviceTarget.CPU.value:
741
- device_process_label = self._host_cpu_op_label
742
- return device_process_label
743
-
744
- def _get_merged_time_list(self, time_list, get_interval_time=False, display_name="computation_op", factor=1):
745
- """
746
- Get merged time segment list.
747
-
748
- The process of merge is, for example, there is a list [[1,5], [2,6], [7,8]],
749
- each items in this list contains a start_time and end_time,
750
- the merged result is [[1,6], [7,8]].
751
- """
752
- time_merged_segment_list = []
753
- tid = self._tid_dict.get(display_name, (0, 0))[0]
754
- pid = self._tid_dict.get(display_name, (0, 0))[1]
755
- for time_item in time_list:
756
- time_segment = list(map(float, time_item[self._start_time_idx:self._duration_idx + 1]))
757
- time_segment[1] = time_segment[0] + time_segment[1] / factor
758
- if not time_merged_segment_list or \
759
- time_segment[0] > time_merged_segment_list[-1]:
760
- time_merged_segment_list.extend(time_segment)
761
- else:
762
- time_merged_segment_list[-1] = max(
763
- time_merged_segment_list[-1],
764
- time_segment[1]
765
- )
766
-
767
- # merged_display_list data used for ui page.
768
- merged_display_list = [
769
- [display_name, tid, time_merged_segment_list[i * 2],
770
- (time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[i * 2]) * factor, pid] for i in \
771
- range(len(time_merged_segment_list) // 2)
772
- ]
773
-
774
- if get_interval_time:
775
- time_merged_segment_list = time_merged_segment_list[1:-1]
776
-
777
- # merged_res_list data used to compute overlap with other time_list.
778
- merged_res_list = []
779
- for i in range(len(time_merged_segment_list) // 2):
780
- merged_res_list.append([display_name, tid, time_merged_segment_list[i * 2],
781
- time_merged_segment_list[i * 2 + 1], pid])
782
-
783
- # interval_display_list is interval time used for ui page.
784
- interval_display_list = [
785
- [display_name, tid, time_merged_segment_list[i * 2],
786
- (time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[i * 2]) * factor, pid]
787
- for i in range(len(time_merged_segment_list) // 2)
788
- ]
789
-
790
- return merged_res_list, interval_display_list, merged_display_list
791
-
792
- def _update_format_meta_data(self, timeline_dict):
793
- """Update format meta data which control the display arrange and map the thread name."""
794
- thread_name_meta_data = {
795
- "name": "thread_name",
796
- "pid": int(self._device_id),
797
- "tid": 100000,
798
- "ts": 0,
799
- "ph": "M",
800
- "cat": "__metadata",
801
- "args": {
802
- "name": "Steps"
803
- }
804
- }
805
- tid_name = timeline_dict['tid']
806
- sort_index = 0
807
-
808
- if tid_name in self._map_tid_name_to_int.keys():
809
- sort_index, tid = self._map_tid_name_to_int[tid_name]
810
- elif tid_name.startswith("Stream"):
811
- tid = int(tid_name.split("#")[-1])
812
- sort_index = tid
813
- else:
814
- return
815
-
816
- if self._host_cpu_op_label == tid_name[:len(self._host_cpu_op_label)]:
817
- thread_name_meta_data['pid'] = self._HOST_CPU_PID
818
-
819
- thread_name_meta_data["tid"] = tid
820
- thread_name_meta_data["args"]["name"] = tid_name
821
- thread_sort_meta_data = thread_name_meta_data.copy()
822
- thread_sort_meta_data['name'] = "thread_sort_index"
823
- thread_sort_meta_data["args"] = {"sort_index": sort_index}
824
- timeline_dict["tid"] = tid
825
-
826
- if tid_name in self._thread_processed_list:
827
- return
828
-
829
- self._thread_processed_list.append(tid_name)
830
- self._format_meta_data_list.append(thread_name_meta_data)
831
- self._format_meta_data_list.append(thread_sort_meta_data)
832
-
833
- def _get_max_scope_name_num(self, timeline_list):
834
- """Get the max number of scope level from all operator."""
835
- max_scope_name_num = 0
836
- for time_item in timeline_list:
837
- cur_scope_name_num = len(time_item[self._op_name_idx].split('/')) - 1
838
- max_scope_name_num = max(cur_scope_name_num, max_scope_name_num)
839
-
840
- return max_scope_name_num
841
-
842
- def _get_scope_name_time_list(self, timeline_list, subgraph, factor_start_time_to_duration=1):
843
- """Produce the timeline of hierarchical scope name."""
844
- # the key of scope_name_start_duration_dict is scope name, the value is a dict which store the
845
- # start and end index of time_item in timeline_list.
846
- scope_name_start_duration_dict = {}
847
- scope_name_time_list = []
848
- op_full_name_idx, scope_name_idx, invalid_idx = 0, 0, -1
849
- tid = "Scope Name"
850
- for idx, time_item in enumerate(timeline_list):
851
- scope_name_list = time_item[op_full_name_idx].split('/')[:-1]
852
- # skip Default/InitDataSetQueue operator.
853
- if time_item[op_full_name_idx].startswith("Default/InitDataSetQueue"):
854
- scope_name_list = []
855
- # process scope name of subgraph(Default/Gradients/recompute_Default) only.
856
- if scope_name_list and scope_name_list[0] != subgraph:
857
- scope_name_list = []
858
- # add the level of scope name, used to distinguish the same name at different scope level.
859
- scope_name_list = [f"{scope_level}-{scope_name}"
860
- for scope_level, scope_name in enumerate(scope_name_list)]
861
-
862
- # update the start and end index of time_item according to current scope_name
863
- for scope_name in scope_name_list:
864
- init_start_end_idx_dict = {'start_item_idx': idx, 'end_item_idx': idx}
865
- if scope_name not in scope_name_start_duration_dict:
866
- scope_name_start_duration_dict[scope_name] = init_start_end_idx_dict
867
- if scope_name_start_duration_dict[scope_name]['start_item_idx'] == invalid_idx:
868
- scope_name_start_duration_dict[scope_name] = init_start_end_idx_dict
869
- else:
870
- scope_name_start_duration_dict[scope_name]['end_item_idx'] = idx
871
- # if the key(scope name) in scope_name_start_duration_dict does not appear in scope_name_list,
872
- # it means this key(scope name) is end and it is append to scope_name_time_list.
873
- for key, val in scope_name_start_duration_dict.items():
874
- if val['start_item_idx'] == invalid_idx:
875
- continue
876
- if (key not in scope_name_list) \
877
- or idx == (len(timeline_list) - 1) \
878
- or time_item[op_full_name_idx] == self._step_end_op_name:
879
- start_time = timeline_list[val['start_item_idx']][self._start_time_idx]
880
- duration = (float(timeline_list[val['end_item_idx']][self._start_time_idx]) - float(start_time)) * \
881
- factor_start_time_to_duration + \
882
- float(timeline_list[val['end_item_idx']][self._duration_idx])
883
- scope_name_time_item = [key, tid, start_time, duration]
884
- scope_name_time_list.append(scope_name_time_item)
885
- scope_name_start_duration_dict[key]['start_item_idx'] = invalid_idx
886
-
887
- # x[scope_name_idx] is a scope name like "0-Default".
888
- # if two element in scope_name_time_list have the same start time,
889
- # the previous element in list will displayed at the higher line in UI page.
890
- scope_name_time_list.sort(
891
- key=lambda x: (float(x[self._start_time_idx]), int(x[scope_name_idx].split('-')[0]))
892
- )
893
-
894
- return scope_name_time_list
895
-
896
- def _set_step_start_and_end_op_name(self, timeline_list):
897
- """Set the start and end operator full name of each step."""
898
- if not timeline_list:
899
- return
900
- start_op_idx = 0
901
- if timeline_list[0][self._op_name_idx].startswith("Default/InitDataSetQueue"):
902
- start_op_idx = 1
903
- self._step_start_op_name = timeline_list[start_op_idx][self._op_name_idx]
904
- self._step_end_op_name = self._step_start_op_name
905
- if len(timeline_list) > (start_op_idx + 1):
906
- for time_item in timeline_list[start_op_idx + 1:]:
907
- if time_item[self._op_name_idx] != self._step_start_op_name:
908
- self._step_end_op_name = time_item[self._op_name_idx]
909
- else:
910
- break
911
-
912
- def _get_step_time_list(self, timeline_list, factor_start_time_to_duration=1):
913
- """Produce the time of each step."""
914
- # Record the time of each step.
915
- step_time_list = []
916
- step_num = 1
917
- tid = "Steps"
918
- cur_step_start_time, cur_step_duration_time = 0, 0
919
- for time_item in timeline_list:
920
- if time_item[self._op_name_idx] == self._step_start_op_name:
921
- cur_step_start_time = time_item[self._start_time_idx]
922
- if time_item[self._op_name_idx] == self._step_end_op_name:
923
- cur_step_duration_time = (float(time_item[self._start_time_idx]) - float(cur_step_start_time)) * \
924
- float(factor_start_time_to_duration) + float(time_item[self._duration_idx])
925
- step_time_item = [str(step_num), tid, float(cur_step_start_time), cur_step_duration_time]
926
- step_time_list.append(step_time_item)
927
- step_num += 1
928
-
929
- return step_time_list
930
-
931
- def _write_cluster_metrices(self, metrices, is_pipeline_parallel, device_target, dev_id):
932
- """Write cluster metric."""
933
- # Note that the feature of cluster bottleneck analyse is not supported in offline parse mode,
934
- # due to that parallel context is not set.
935
- parallel_mode, stage_num = BaseTimelineGenerator.get_parallel_context()
936
-
937
- unit = 1 if device_target == "Ascend" else 1e3
938
- time_decimal_digits = 4
939
- cluster_analyse_file_path = os.path.join(
940
- self._profiling_dir,
941
- self._cluster_analyse_filename.format(parallel_mode, stage_num, self._rank_size, dev_id)
942
- )
943
- cluster_analyse_file_path = validate_and_normalize_path(cluster_analyse_file_path)
944
-
945
- try:
946
- with open(cluster_analyse_file_path, 'w') as file_handle:
947
- csv_writer = csv.writer(file_handle)
948
- if is_pipeline_parallel:
949
- header = ['computation_time', 'communication_alone_time', 'stage_time',
950
- 'receive_alone_time', 'collective_communication_alone_time']
951
- zip_metrices = zip(metrices[0], metrices[1], metrices[2], metrices[3], metrices[4])
952
- else:
953
- header = ['computation_time', 'communication_alone_time']
954
- zip_metrices = zip(metrices[0], metrices[1])
955
- csv_writer.writerow(header)
956
- for row_data in zip_metrices:
957
- row_data = [round(val / unit, time_decimal_digits) for val in row_data]
958
- csv_writer.writerow(row_data)
959
- os.chmod(cluster_analyse_file_path, stat.S_IREAD | stat.S_IWRITE)
960
- except (IOError, OSError) as err:
961
- logger.warning(f'Failed to save {cluster_analyse_file_path}. {err}')
962
- raise ProfilerIOException from err
963
-
964
- def _register_op_name(self, timeline_list):
965
- """Register op name to op name list."""
966
- for timeline in timeline_list:
967
- if timeline and timeline[self._op_name_idx] not in self._op_name_list:
968
- self._op_name_list.append(timeline[self._op_name_idx])
969
-
970
-
971
- class GpuTimelineGenerator(BaseTimelineGenerator):
972
- """Generate gpu Timeline data from file."""
973
- _display_filename = 'gpu_timeline_display_{}.json'
974
- _timeline_summary_filename = 'gpu_timeline_summary_{}.json'
975
- _output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt"
976
- _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt"
977
- _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
978
- _step_trace_original_filename = 'step_trace_profiling_{}.txt'
979
- _cluster_analyse_filename = 'gpu_cluster_analyse_{}_{}_{}_{}.csv'
980
- _activity_keys_list = []
981
-
982
- def __init__(self, profiling_dir, device_id, rank_size, model):
983
- super().__init__(DeviceTarget.GPU.value, model)
984
- self._device_id = device_id
985
- self._rank_size = rank_size
986
- self._profiling_dir = profiling_dir
987
- self._device_id = device_id
988
- self._timeline_meta = []
989
- self._display_filename = self._display_filename.format(device_id)
990
- self._timeline_summary_filename = self._timeline_summary_filename.format(device_id)
991
- self._tid_dict = {
992
- "receive_op_not_overlapped": (self._RECEIVE_ALONE, self._OP_OVERLAP_PID),
993
- "exclude_receive_op": (self._ALLREDUCE_ALONE, self._OP_OVERLAP_PID),
994
- "computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
995
- "communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
996
- "communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
997
- "free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
998
- }
999
-
1000
- def init_timeline(self, reduce_op_type):
1001
- """Init timeline metadata, adding all collected info."""
1002
- timeline_list = self._load_timeline_data(reduce_op_type)
1003
-
1004
- # Init a dict for counting the num of streams.
1005
- stream_count_dict = {}
1006
- for timeline in timeline_list:
1007
- self._parse_timeline_data(timeline, 0)
1008
- # Updating the collection of streams.
1009
- if len(timeline) == 4:
1010
- self._update_num_of_streams(timeline, stream_count_dict)
1011
-
1012
- # Add format thread meta data.
1013
- self._format_meta_data_list.extend(self._timeline_meta)
1014
- self._timeline_meta = self._format_meta_data_list
1015
-
1016
- # Update timeline summary info
1017
- self._timeline_summary['num_of_streams'] += len(stream_count_dict)
1018
-
1019
- def check_op_name(self, op_name):
1020
- """
1021
- Check whether the operator name exists.
1022
-
1023
- Args:
1024
- op_name (str): The operator name or operator name prefix.
1025
-
1026
- Returns:
1027
- bool, `True` if the operator name does exist, else `False`.
1028
- """
1029
- if not op_name:
1030
- raise ProfilerParamValueErrorException('The op_name should exist.')
1031
- for op_time_info in self._timeline_meta:
1032
- full_op_name = op_time_info['name']
1033
- if full_op_name and full_op_name.startswith(op_name):
1034
- return True
1035
- return False
1036
-
1037
- def is_gpu_kernel_async_launch(self):
1038
- """Recognize the solution that launch the gpu kernel async."""
1039
- step_trace_profiling_path = self._get_and_validate_path(
1040
- self._step_trace_original_filename
1041
- )
1042
- try:
1043
- with open(step_trace_profiling_path, 'r') as f_obj:
1044
- line = next(f_obj)
1045
- first_string = line.strip().split()[0]
1046
- # the data format of launch the gpu kernel async is "Default/op1,160123 op-name"
1047
- # otherwise, the data format is "Default/op1 160123,12 "
1048
- return bool(len(first_string.split(',')) == 2)
1049
- except (IOError, OSError) as err:
1050
- logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
1051
- raise ProfilerIOException() from err
1052
- except StopIteration:
1053
- logger.warning('No step trace data exists.')
1054
- return False
1055
-
1056
- def _get_and_validate_path(self, file_name):
1057
- """Generate op or activity file path from file name, and validate this path."""
1058
- file_path = os.path.join(
1059
- self._profiling_dir,
1060
- file_name.format(self._device_id)
1061
- )
1062
- file_path = validate_and_normalize_path(file_path)
1063
- if not os.path.exists(file_path):
1064
- logger.critical(f"Failed to find parsed timeline file {file_path}.")
1065
- raise ProfilerFileNotFoundException('parsed timeline file')
1066
-
1067
- return file_path
1068
-
1069
- def _parse_timeline_data(self, timeline, min_cycle_counter):
1070
- """Parse timeline data."""
1071
- # factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
1072
- factor = 1000
1073
- op_meta = TimelineContainer(timeline)
1074
- timeline_dict = {}
1075
- timeline_dict['name'] = op_meta.op_name.split('/')[-1]
1076
- timeline_dict['ph'] = 'X'
1077
- timeline_dict['tid'] = op_meta.stream_id
1078
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) / factor
1079
- dur = op_meta.duration
1080
- timeline_dict['dur'] = dur
1081
- if op_meta.pid is None:
1082
- timeline_dict['pid'] = int(self._device_id)
1083
- else:
1084
- timeline_dict['pid'] = op_meta.pid
1085
- if op_meta.stream_id == "Scope Name":
1086
- # remove the level of scope name which has a format like "0-conv2-Conv2d".
1087
- timeline_dict['name'] = "-".join(op_meta.op_name.split('-')[1:])
1088
- timeline_dict['scope_level'] = int(op_meta.op_name.split('-')[0])
1089
- elif op_meta.stream_id[:len(self._host_cpu_op_label)] == self._host_cpu_op_label:
1090
- timeline_dict['pid'] = self._HOST_CPU_PID
1091
-
1092
- if len(timeline) > 4:
1093
- # len(timeline) > 4 refers to activity data, else op data.
1094
- # Add args for activity data
1095
- args_dict = {}
1096
- for ix, value in enumerate(timeline[4:]):
1097
- args_dict[self._activity_keys_list[ix]] = value
1098
- timeline_dict['args'] = args_dict
1099
- timeline_dict['tid'] = f"Stream #{timeline_dict['tid']}"
1100
- elif op_meta.stream_id not in ["Scope Name", "Steps"]:
1101
- # Update total time of operator execution.
1102
- self._timeline_summary['total_time'] += dur / factor
1103
- self._timeline_summary['op_exe_times'] += 1
1104
-
1105
- self._update_format_meta_data(timeline_dict)
1106
- self._timeline_meta.append(timeline_dict)
1107
-
1108
- def _load_timeline_data(self, reduce_op_type):
1109
- """Load timeline data from file."""
1110
- op_file_path = self._get_and_validate_path(
1111
- self._output_op_execute_time_file_path)
1112
- activity_file_path = self._get_and_validate_path(
1113
- self._output_activity_execute_time_file_path)
1114
- activity_args_file_path = self._get_and_validate_path(
1115
- self._output_gpu_activity_info_file_path)
1116
-
1117
- timeline_list, communication_info = self._load_op_data(op_file_path, reduce_op_type)
1118
- communication_info.sort(key=lambda x: float(x[2]))
1119
- # Add host cpu op timeline.
1120
- cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._model)
1121
- cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
1122
- if cpu_timeline_list:
1123
- self._clock_synchronize_to_gpu(cpu_timeline_list)
1124
- timeline_list.extend(cpu_timeline_list)
1125
- timeline_list.sort(key=lambda x: float(x[2]))
1126
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
1127
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
1128
-
1129
- # Generate step time.
1130
- factor_start_time_uint_to_duration = 1e-3
1131
- self._set_step_start_and_end_op_name(timeline_list)
1132
- # Fit gpu kernel async launch solution.
1133
- if self.is_gpu_kernel_async_launch():
1134
- step_time_list = self._get_step_time_list_from_step_trace()
1135
- else:
1136
- step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
1137
-
1138
- # Add Scope Name.
1139
- default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
1140
- factor_start_time_uint_to_duration)
1141
- gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
1142
- factor_start_time_uint_to_duration)
1143
- recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
1144
- factor_start_time_uint_to_duration)
1145
-
1146
- activity_timeline_list, cuda_compute_ops_timeline_list = self._load_activity_data( \
1147
- activity_file_path, activity_args_file_path)
1148
-
1149
- # Add AllReduce info to timeline temp list and sort by start time.
1150
- if communication_info:
1151
- logger.debug('Allreduce info found, Start adding info to timeline...')
1152
- cluster_related_timeline = self._get_cluster_timeline(
1153
- timeline_list, cuda_compute_ops_timeline_list, communication_info, step_time_list)
1154
- timeline_list.extend(cluster_related_timeline)
1155
- timeline_list.extend(communication_info)
1156
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1157
-
1158
- timeline_list.extend(default_scope_name_time_list)
1159
- timeline_list.extend(gradient_scope_name_time_list)
1160
- timeline_list.extend(recompute_scope_name_time_list)
1161
- timeline_list.extend(step_time_list)
1162
-
1163
- timeline_list.sort(key=lambda x: (float(x[self._start_time_idx])))
1164
-
1165
- # Add cuda activity timeline.
1166
- timeline_list.extend(activity_timeline_list)
1167
- timeline_list.sort(key=lambda x: float(x[2]))
1168
-
1169
- return timeline_list
1170
-
1171
- def _clock_synchronize_to_gpu(self, timeline_list):
1172
- """Synchronize the timestamp from device to host."""
1173
- start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt")
1174
-
1175
- try:
1176
- with open(start_time_file_path) as f_obj:
1177
- lines = f_obj.readlines()
1178
- # lines[0] stores the host monotonic time of start training.
1179
- host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
1180
- # lines[1] stores the gpu time of start training.
1181
- gpu_start_time = int(lines[1].strip().split(':')[-1])
1182
- except (IOError, OSError) as err:
1183
- logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
1184
- raise ProfilerIOException() from err
1185
-
1186
- time_diff = gpu_start_time - host_monotonic_start_time
1187
- for idx, time_item in enumerate(timeline_list):
1188
- timeline_list[idx][self._start_time_idx] = int(time_item[self._start_time_idx]) + time_diff
1189
-
1190
- def _load_op_data(self, op_file_path, reduce_op_type):
1191
- """Load operator data from file"""
1192
- op_timeline_list = []
1193
- communication_info = []
1194
- try:
1195
- with open(op_file_path, 'r') as f_obj:
1196
- for line in f_obj:
1197
- self._timeline_summary['num_of_ops'] += 1
1198
- op_list = line.strip('\n').strip().split(';')
1199
- time_arr = op_list[-1]
1200
- time_arr = time_arr.split(" ")
1201
- for time in time_arr:
1202
- time = time.split(",")
1203
- line_list = op_list[:2] + time
1204
- communication_op_name = line_list[0].strip().split('/')[-1]
1205
- if communication_op_name not in reduce_op_type:
1206
- op_timeline_list.append(line_list)
1207
- else:
1208
- communication_info.append(line_list)
1209
- except (IOError, OSError) as err:
1210
- logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
1211
- raise ProfilerIOException() from err
1212
-
1213
- return op_timeline_list, communication_info
1214
-
1215
- def _load_activity_data(self, activity_file_path, activity_args_file_path):
1216
- """Load activity data from file"""
1217
- activity_timeline_list = []
1218
- cuda_compute_ops_timeline_list = []
1219
- args_dict = {}
1220
- try:
1221
- with open(activity_args_file_path, 'r') as args_file:
1222
- csv_reader = csv.reader(args_file)
1223
- keys_list = next(csv_reader)
1224
- # keys_list [name, type, op_full_name, stream_id, block_dim, grid_dim, ...]
1225
- self._activity_keys_list = keys_list[1:3] + keys_list[4:6]
1226
- for info in csv_reader:
1227
- args_dict[info[0]] = info[1:3] + info[4:6]
1228
- with open(activity_file_path, 'r') as f_obj:
1229
- for line in f_obj:
1230
- line_list = line.strip('\n').split(';')
1231
- # concat activity args info.
1232
- line_list += args_dict[line_list[0]]
1233
- if not line_list[0].startswith('nccl'):
1234
- cuda_compute_ops_timeline_list.append(line_list)
1235
- activity_timeline_list.append(line_list)
1236
- except (IOError, OSError) as err:
1237
- logger.critical('Error occurred when load activity timeline data intermediate file: %s', err)
1238
- raise ProfilerIOException() from err
1239
-
1240
- return activity_timeline_list, cuda_compute_ops_timeline_list
1241
-
1242
- def _get_step_time_list_from_step_trace(self):
1243
- """Produce the time of each step based on step_trace_profiling file."""
1244
- # Record the time of each step.
1245
- step_time_list = []
1246
- step_start_op_name = []
1247
- step_end_op_name = []
1248
- step_num = 1
1249
- tid = "Steps"
1250
- step_trace_profiling_path = self._get_and_validate_path(
1251
- self._step_trace_original_filename
1252
- )
1253
-
1254
- try:
1255
- with open(step_trace_profiling_path, 'r') as f_obj:
1256
- for line in f_obj:
1257
- line = line.strip().split()
1258
- step_start_op_name.append(line[0].split(',')[0])
1259
- step_end_op_name.append(line[3].split(',')[0])
1260
- cur_step_start_time = float(line[0].split(',')[1])
1261
- cur_step_end_time = float(line[3].split(',')[1])
1262
- # convert duration time unit from ns to us.
1263
- cur_step_duration_time = (cur_step_end_time - cur_step_start_time) / 1e3
1264
- step_time_item = [str(step_num), tid, cur_step_start_time, cur_step_duration_time]
1265
- step_time_list.append(step_time_item)
1266
- step_num += 1
1267
- except (IOError, OSError) as err:
1268
- logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
1269
- raise ProfilerIOException() from err
1270
-
1271
- return step_time_list
1272
-
1273
- def _get_cluster_timeline(self, timeline, activity_info, comm_info, step_info):
1274
- """
1275
- Analyse the cluster communication and computation data, and write result to file.
1276
-
1277
- To analyse the cluster performance bottleneck based on timeline, define the time of a training
1278
- step as "t_total", propose five metrics as follows:
1279
- 1) The time that "receive" operators not overlapped by others(t1)
1280
- 2) The time that is consumed inside the stage(t_total - t1)
1281
- 3) The time that "communication" operators not overlapped by others(t2)
1282
- 4) The time that consumed by computation(t_total - t2)
1283
- 5) The time that "collective communication" operators not overlapped by others(t3)
1284
- In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
1285
- we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
1286
- communication time between stages slow down the training. The value of t3 indicates the degree
1287
- that communication inside each stage slow down the training.
1288
- """
1289
- step_num = len(step_info)
1290
- is_pipeline_parallel = False
1291
- comm_merged_timeline, _, comm_display_timeline = self._get_merged_time_list(
1292
- comm_info,
1293
- display_name="communication",
1294
- factor=1e-3
1295
- )
1296
- compute_op_timeline = timeline + activity_info
1297
- compute_op_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
1298
- compute_op_timeline_interval, _, compute_op_display_timeline = self._get_merged_time_list(
1299
- compute_op_timeline,
1300
- get_interval_time=True,
1301
- factor=1e-3
1302
- )
1303
- # Consider if the overlap will be 0 or not.
1304
- comm_not_overlapped_timeline = self._get_intersection_time(
1305
- compute_op_timeline_interval,
1306
- comm_merged_timeline
1307
- )
1308
-
1309
- # Process receive part.
1310
- all_timeline = timeline + comm_info
1311
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
1312
- receive_op_timeline = self._produce_two_separated_timeline(
1313
- all_timeline,
1314
- "Receive-op"
1315
- )[0]
1316
- if receive_op_timeline:
1317
- is_pipeline_parallel = True
1318
- receive_op_merged_timeline = self._get_merged_time_list(receive_op_timeline,
1319
- factor=1e-3)[0]
1320
-
1321
- receive_op_not_overlapped_timeline = self._get_intersection_time(
1322
- compute_op_timeline_interval,
1323
- receive_op_merged_timeline,
1324
- display_name="receive_op_not_overlapped"
1325
- )
1326
-
1327
- # Process collective communication part.
1328
- collective_comm_timeline = self._produce_two_separated_timeline(
1329
- comm_info,
1330
- "Receive-op"
1331
- )[-1]
1332
- collective_comm_merged_timeline = self._get_merged_time_list(collective_comm_timeline,
1333
- factor=1e-3)[0]
1334
- collective_comm_not_overlapped_timeline = self._get_intersection_time(
1335
- compute_op_timeline_interval,
1336
- collective_comm_merged_timeline,
1337
- display_name="exclude_receive_op"
1338
- )
1339
-
1340
- # Generate free time that exclude computation and communication time.
1341
- all_timeline = compute_op_timeline + comm_info
1342
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
1343
- free_timeline = self._get_merged_time_list(
1344
- all_timeline,
1345
- get_interval_time=True,
1346
- display_name="free_time",
1347
- factor=1e-3
1348
- )[1]
1349
-
1350
- # Compute these five metrics mentioned above per step.
1351
- recieve_alone_time = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
1352
- stage_time, computation_time = [], []
1353
- comm_alone_time = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
1354
- collective_comm_alone_time = self._compute_time_inside_step(
1355
- collective_comm_not_overlapped_timeline,
1356
- step_info
1357
- )
1358
- for step in range(step_num):
1359
- try:
1360
- if is_pipeline_parallel:
1361
- stage_time.append(step_info[step][self._duration_idx] - recieve_alone_time[step])
1362
- computation_time.append(step_info[step][self._duration_idx] - comm_alone_time[step])
1363
- except IndexError as e:
1364
- logger.error(e)
1365
-
1366
- metrices_per_step_list = [computation_time, comm_alone_time, stage_time,
1367
- recieve_alone_time, collective_comm_alone_time]
1368
- if step_num > 1:
1369
- for metric in metrices_per_step_list:
1370
- metric.append(sum(metric[1:]) / (step_num - 1))
1371
- self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Gpu", self._device_id)
1372
-
1373
- res_timeline = []
1374
- res_timeline.extend(comm_not_overlapped_timeline)
1375
- res_timeline.extend(compute_op_display_timeline)
1376
- res_timeline.extend(comm_display_timeline)
1377
- res_timeline.extend(free_timeline)
1378
- return res_timeline
1379
-
1380
- def _compute_time_inside_step(self, metric_timeline, step_time_list):
1381
- """Compute per step time of metric_timeline."""
1382
- per_step_time_list = []
1383
- step = 0
1384
- cur_step_metric_time = 0
1385
- factor_us_to_ns = 1e3
1386
- step_end_time = step_time_list[step][self._start_time_idx] + \
1387
- step_time_list[step][self._duration_idx] * factor_us_to_ns
1388
- for time_item in metric_timeline:
1389
- start_time = time_item[self._start_time_idx]
1390
- if start_time > step_end_time:
1391
- per_step_time_list.append(cur_step_metric_time)
1392
- step += 1
1393
- if step >= len(step_time_list):
1394
- logger.warning("Compute profiler compute_time_inside_step time, "
1395
- "find the data length is more than step count, "
1396
- "maybe current graph has multi sub graph, skip the last data.")
1397
- break
1398
- step_end_time = step_time_list[step][self._start_time_idx] + \
1399
- step_time_list[step][self._duration_idx] * factor_us_to_ns
1400
- cur_step_metric_time = 0
1401
- cur_step_metric_time += time_item[self._duration_idx]
1402
- per_step_time_list.append(cur_step_metric_time)
1403
-
1404
- return per_step_time_list
1405
-
1406
- def _get_intersection_time(self, first_time_list, second_time_list,
1407
- display_name="communication_not_overlapped"):
1408
- """Get intersection time of two time list."""
1409
- first_list_idx, second_list_idx = 0, 0
1410
- first_list_len = len(first_time_list)
1411
- second_list_len = len(second_time_list)
1412
- intersection_segment_display_list = []
1413
- factor_ns_to_us = 1e-3
1414
- while first_list_idx < first_list_len and second_list_idx < second_list_len:
1415
- intersection_start = max(
1416
- first_time_list[first_list_idx][self._start_time_idx],
1417
- second_time_list[second_list_idx][self._start_time_idx]
1418
- )
1419
- intersection_end = min(
1420
- first_time_list[first_list_idx][self._duration_idx],
1421
- second_time_list[second_list_idx][self._duration_idx]
1422
- )
1423
- if intersection_start < intersection_end:
1424
- intersection_segment_display_list.append(
1425
- [display_name, self._tid_dict[display_name][0],
1426
- intersection_start, (intersection_end - intersection_start) * factor_ns_to_us,
1427
- self._tid_dict[display_name][1]]
1428
- )
1429
- if first_time_list[first_list_idx][self._duration_idx] >= \
1430
- second_time_list[second_list_idx][self._duration_idx]:
1431
- second_list_idx += 1
1432
- else:
1433
- first_list_idx += 1
1434
-
1435
- return intersection_segment_display_list
1436
-
1437
- def _produce_two_separated_timeline(self, timeline, op_name):
1438
- """Produce two separated timeline based on op_name."""
1439
- timeline_include_op_name = []
1440
- timeline_exclude_op_name = []
1441
- for time_item in timeline:
1442
- if op_name in time_item[self._op_name_idx]:
1443
- timeline_include_op_name.append(time_item)
1444
- else:
1445
- timeline_exclude_op_name.append(time_item)
1446
- return timeline_include_op_name, timeline_exclude_op_name
1447
-
1448
-
1449
- class AscendTimelineGenerator(BaseTimelineGenerator):
1450
- """Generate ascend Timeline data from file."""
1451
- _display_filename = 'ascend_timeline_display_{}.json'
1452
- _timeline_summary_filename = 'ascend_timeline_summary_{}.json'
1453
- _cluster_analyse_filename = 'ascend_cluster_analyse_{}_{}_{}_{}.csv'
1454
-
1455
- def __init__(self, profiling_dir, device_id, rank_id, rank_size, model):
1456
- super().__init__(DeviceTarget.ASCEND.value, model)
1457
- self._profiling_dir = profiling_dir
1458
- self._device_id = device_id
1459
- self._rank_id = rank_id
1460
- self._rank_size = rank_size
1461
- self._display_filename = self._display_filename.format(rank_id)
1462
- self._timeline_summary_filename = self._timeline_summary_filename.format(rank_id)
1463
-
1464
- @staticmethod
1465
- def _get_all_reduce_names(communication_info):
1466
- names = []
1467
- for info in communication_info:
1468
- # all_reduce_name format: stream_stream_id_stream_op_index_opname
1469
- all_reduce_name = info[0][info[0].rindex('_') + 1:]
1470
- if all_reduce_name not in names:
1471
- names.append(all_reduce_name)
1472
- return names
1473
-
1474
- def init_timeline(self, communication_info, framework_info, aicpu_info, min_cycle_counter, source_path):
1475
- """
1476
- Init timeline metadata, adding all collected info.
1477
-
1478
- Args:
1479
- communication_info (list[list]): The metadata of communication operator.
1480
- framework_info (dict): The framework metadata.
1481
- aicpu_info (dict): The metadata of AI CPU operator.
1482
- min_cycle_counter (float): The minimum cycle counter of the timeline.
1483
- source_path (str): The source of file.
1484
- """
1485
- if min_cycle_counter == float('inf'):
1486
- min_cycle_counter = 0
1487
-
1488
- logger.info('Initiating timeline...')
1489
- timeline_list = []
1490
- op_timeline_list = self._get_op_timeline(communication_info, source_path)
1491
- timeline_list.extend(op_timeline_list)
1492
-
1493
- # Generate step time.
1494
- self._set_step_start_and_end_op_name(timeline_list)
1495
- step_time_list = self._get_step_time_list(timeline_list)
1496
-
1497
- # Add Scope Name.
1498
- default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default")
1499
- gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients")
1500
- recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default")
1501
-
1502
- # Add AI CPU data into timeline temp list and sort by start time.
1503
- aicpu_data = aicpu_info.get('info')
1504
- if aicpu_data:
1505
- timeline_list.extend(aicpu_data)
1506
- self._timeline_summary['op_exe_times'] += aicpu_info.get('op_exe_times', 0)
1507
- self._timeline_summary['num_of_streams'] += aicpu_info.get('num_of_streams', 0)
1508
- self._timeline_summary['num_of_ops'] += aicpu_info.get('num_of_ops', 0)
1509
- self._timeline_summary['total_time'] += aicpu_info.get('total_time', 0)
1510
-
1511
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1512
-
1513
- # Add AllReduce info to timeline temp list and sort by start time.
1514
- if communication_info:
1515
- logger.debug('AllReduce info found. Start adding info into timeline...')
1516
- cluster_related_timeline = self._get_cluster_timeline(
1517
- timeline_list, communication_info, step_time_list)
1518
- timeline_list.extend(cluster_related_timeline)
1519
- timeline_list.extend(communication_info)
1520
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1521
-
1522
- # Add step time and scope name info.
1523
- timeline_list.extend(step_time_list)
1524
- timeline_list.extend(default_scope_name_time_list)
1525
- timeline_list.extend(recompute_scope_name_time_list)
1526
- timeline_list.extend(gradient_scope_name_time_list)
1527
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1528
-
1529
- # Init a dict for counting the num of streams.
1530
- stream_count_dict = {}
1531
- for timeline in timeline_list:
1532
- self._parse_timeline_data(timeline, min_cycle_counter)
1533
- # Updating the collection of streams.
1534
- if len(timeline) == 4:
1535
- self._update_num_of_streams(timeline, stream_count_dict)
1536
-
1537
- # Add format thread meta data.
1538
- self._format_meta_data_list.extend(self._timeline_meta)
1539
- self._timeline_meta = self._format_meta_data_list
1540
- # Get framework metadata.
1541
- framework_obj_list = framework_info.get('object')
1542
- # The length of list is the number of operators.
1543
- self._timeline_summary['num_of_ops'] += len(framework_obj_list)
1544
- self._add_framework_info(framework_obj_list)
1545
- logger.info('Finished adding info into timeline...')
1546
-
1547
- # Update timeline summary info
1548
- self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
1549
-
1550
- def init_pynative_timeline(self):
1551
- """Init timeline for pynative model."""
1552
- timeline_list = OPIntermediateParser(self._profiling_dir, self._rank_id).get_timeline_data()
1553
- cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._model)
1554
- cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
1555
- if cpu_timeline_list:
1556
- self._pynative_clock_synchronize(cpu_timeline_list)
1557
- timeline_list.extend(cpu_timeline_list)
1558
-
1559
- self._register_op_name(timeline_list)
1560
- self._timeline_summary['op_exe_times'] = len(timeline_list)
1561
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
1562
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
1563
- self._timeline_summary['num_of_ops'] = len(self._op_name_list)
1564
-
1565
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1566
- min_cycle_counter = float(timeline_list[0][self._start_time_idx])
1567
-
1568
- step_timeline = self._pynative_get_step_timeline_list(timeline_list)
1569
- timeline_list.extend(step_timeline)
1570
-
1571
- stream_count_dict = {}
1572
- max_scope_name_num = 0
1573
- for timeline in timeline_list:
1574
- self._parse_timeline_data(timeline, min_cycle_counter)
1575
- self._update_num_of_streams(timeline, stream_count_dict)
1576
- cur_scope_name_num = len(timeline[self._op_name_idx].split('/')) - 1
1577
- max_scope_name_num = max(cur_scope_name_num, max_scope_name_num)
1578
-
1579
- self._timeline_summary['max_scope_name_num'] = max_scope_name_num
1580
- self._timeline_summary['num_of_streams'] = len(stream_count_dict)
1581
-
1582
- def _parse_timeline_data(self, timeline, min_cycle_counter):
1583
- """Parse timeline data."""
1584
- # factor to convert the time unit from 1ms to 1us for timeline display
1585
- factor = 1000
1586
- op_meta = TimelineContainer(timeline)
1587
- timeline_dict = {}
1588
- timeline_dict['name'] = op_meta.op_name.split('/')[-1]
1589
- timeline_dict['ph'] = 'X'
1590
- timeline_dict['tid'] = op_meta.stream_id
1591
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) * factor
1592
- dur = op_meta.duration * factor
1593
- timeline_dict['dur'] = dur
1594
- if op_meta.pid is None:
1595
- timeline_dict['pid'] = int(self._device_id)
1596
- # Update total time of operator execution.
1597
- if op_meta.stream_id not in ["Steps", "Scope Name"]:
1598
- self._timeline_summary['total_time'] += op_meta.duration
1599
- else: # AllReduce and AI CPU pid
1600
- timeline_dict['pid'] = op_meta.pid
1601
-
1602
- if op_meta.stream_id == "Scope Name":
1603
- # remove the level of scope name which has a format like "0-conv2-Conv2d".
1604
- timeline_dict['name'] = "-".join(op_meta.op_name.split('-')[1:])
1605
- timeline_dict['scope_level'] = int(op_meta.op_name.split('-')[0])
1606
- elif op_meta.stream_id[:len(self._host_cpu_op_label)] == self._host_cpu_op_label:
1607
- timeline_dict['pid'] = self._HOST_CPU_PID
1608
-
1609
- self._update_format_meta_data(timeline_dict)
1610
- self._timeline_meta.append(timeline_dict)
1611
-
1612
- def _get_op_timeline(self, communication_info, source_path):
1613
- """get ai_core and cpu timeline."""
1614
- all_reduce_names = AscendTimelineGenerator._get_all_reduce_names(communication_info)
1615
- timeline_list = OPIntermediateParser(self._profiling_dir, self._rank_id).get_timeline_data(all_reduce_names)
1616
- for timeline in timeline_list:
1617
- timeline[self._tid_idx] = f"Stream #{timeline[self._tid_idx]}"
1618
-
1619
- cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._model)
1620
- cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
1621
- if cpu_timeline_list:
1622
- self._clock_synchronize_to_device(cpu_timeline_list, source_path)
1623
- timeline_list.extend(cpu_timeline_list)
1624
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
1625
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
1626
- self._timeline_summary['op_exe_times'] = len(timeline_list)
1627
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
1628
- return timeline_list
1629
-
1630
- def _clock_synchronize_to_device(self, timeline_list, source_path):
1631
- """Synchronize the timestamp from host to device."""
1632
- host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}")
1633
- dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}")
1634
-
1635
- try:
1636
- with open(host_start_file_path) as f_obj:
1637
- lines = f_obj.readlines()
1638
- # lines[2] stores host monotonic_raw time of start training.
1639
- host_monotonic = int(lines[2].strip().split(':')[1])
1640
- except (IOError, OSError) as err:
1641
- logger.critical('Error occurred when read host_start.log: %s', err)
1642
- raise ProfilerIOException() from err
1643
- try:
1644
- with open(dev_start_file_path) as f_obj:
1645
- lines = f_obj.readlines()
1646
- # lines[2] stores device cycle counter of start training.
1647
- dev_cntvct = int(lines[2].strip().split(':')[1])
1648
- except (IOError, OSError) as err:
1649
- logger.critical('Error occurred when read dev_start.log: %s', err)
1650
- raise ProfilerIOException() from err
1651
-
1652
- factor_ns_to_ms = 1e-6
1653
- factor_ten_ns_to_ns = 10
1654
- factor_ms_to_ns = 1e6
1655
- for idx, time_item in enumerate(timeline_list):
1656
- host_time = int(float(time_item[self._start_time_idx]) * factor_ms_to_ns)
1657
- device_time = dev_cntvct * factor_ten_ns_to_ns + (host_time - host_monotonic)
1658
- timeline_list[idx][self._start_time_idx] = device_time * factor_ns_to_ms
1659
-
1660
- def _add_framework_info(self, framework_obj_list):
1661
- """
1662
- Add framework info into timeline metadata.
1663
-
1664
- Args:
1665
- framework_obj_list (list): The framework metadata.
1666
- """
1667
- logger.debug('Start adding framework info into timeline...')
1668
- # Get the framework info that will be written into timeline.
1669
- framework_info_dict = {}
1670
- for framework_obj in framework_obj_list:
1671
- op_name = framework_obj[0]
1672
- op_type = framework_obj[1]
1673
- op_full_name = framework_obj[4]
1674
- op_info = framework_obj[5]
1675
- framework_info = {
1676
- 'name': op_name,
1677
- 'args': {
1678
- 'type': op_type,
1679
- 'fullname': op_full_name
1680
- }
1681
- }
1682
- framework_info.get('args').update(op_info)
1683
- framework_info_dict[op_full_name] = framework_info
1684
-
1685
- # Insert framework info into timeline.
1686
- for timeline_item in self._timeline_meta:
1687
- op_full_name = timeline_item.get('name')
1688
- framework_item = framework_info_dict.get(op_full_name)
1689
- if framework_item:
1690
- timeline_item['name'] = framework_item.get('name')
1691
- timeline_item['args'] = framework_item.get('args')
1692
- logger.debug('Finished adding framework info into timeline...')
1693
-
1694
- def _produce_two_separated_timeline(self, timeline, op_name):
1695
- """Produce two separated timeline based on op_name."""
1696
- timeline_include_op_name = []
1697
- timeline_exclude_op_name = []
1698
- for time_item in timeline:
1699
- if op_name in time_item[self._op_name_idx]:
1700
- timeline_include_op_name.append(time_item)
1701
- else:
1702
- timeline_exclude_op_name.append(time_item)
1703
- return timeline_include_op_name, timeline_exclude_op_name
1704
-
1705
- def _get_cluster_timeline(self, aicore_info, comm_info, step_info):
1706
- """
1707
- Analyse the cluster communication and computation data, and write result to file.
1708
-
1709
- To analyse the cluster performance bottleneck based on timeline, define the time of a training
1710
- step as "t_total", propose five metrics as follows:
1711
- 1) The time that "receive" operators not overlapped by others(t1)
1712
- 2) The time that is consumed inside the stage(t_total - t1)
1713
- 3) The time that "communication" operators not overlapped by others(t2)
1714
- 4) The time that consumed by computation(t_total - t2)
1715
- 5) The time that "collective communication" operators not overlapped by others(t3)
1716
- In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
1717
- we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
1718
- communication time between stages slow down the training. The value of t3 indicates the degree
1719
- that communication inside each stage slow down the training.
1720
- """
1721
- is_pipeline_parallel = False
1722
- comm_merged_timeline, _, comm_display_timeline = self._get_merged_time_list(
1723
- comm_info, display_name="communication"
1724
- )
1725
- aicore_timeline_interval, _, aicore_display_timeline = self._get_merged_time_list(
1726
- aicore_info, get_interval_time=True
1727
- )
1728
- # Consider if the overlap will be 0 or not.
1729
- comm_not_overlapped_timeline = self._get_intersection_time(
1730
- aicore_timeline_interval, comm_merged_timeline
1731
- )
1732
-
1733
- # Process receive part.
1734
- all_timeline = aicore_info + comm_info
1735
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
1736
- receive_op_timeline, timeline_exclude_receive_op = self._produce_two_separated_timeline(
1737
- all_timeline, "Receive-op"
1738
- )
1739
- if receive_op_timeline:
1740
- is_pipeline_parallel = True
1741
- receive_op_merged_timeline = self._get_merged_time_list(receive_op_timeline)[0]
1742
- timeline_exclude_receive_op_interval = self._get_merged_time_list(
1743
- timeline_exclude_receive_op, get_interval_time=True
1744
- )[0]
1745
- receive_op_not_overlapped_timeline = self._get_intersection_time(
1746
- timeline_exclude_receive_op_interval, receive_op_merged_timeline
1747
- )
1748
-
1749
- # Process collective communication part.
1750
- collective_comm_timeline = self._produce_two_separated_timeline(
1751
- comm_info, "Receive-op"
1752
- )[-1]
1753
- collective_comm_merged_timeline = self._get_merged_time_list(collective_comm_timeline)[0]
1754
- collective_comm_not_overlapped_timeline = self._get_intersection_time(
1755
- aicore_timeline_interval, collective_comm_merged_timeline
1756
- )
1757
-
1758
- # Generate free time that exclude computation and communication time.
1759
- free_timeline = self._get_merged_time_list(
1760
- all_timeline, get_interval_time=True, display_name="free_time"
1761
- )[1]
1762
-
1763
- self._parse_cluster_metrices(step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline
1764
- , collective_comm_not_overlapped_timeline, is_pipeline_parallel)
1765
-
1766
- res_timeline = []
1767
- res_timeline.extend(comm_not_overlapped_timeline)
1768
- res_timeline.extend(aicore_display_timeline)
1769
- res_timeline.extend(comm_display_timeline)
1770
- res_timeline.extend(free_timeline)
1771
-
1772
- return res_timeline
1773
-
1774
- def _parse_cluster_metrices(self, step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline
1775
- , collective_comm_not_overlapped_timeline, is_pipeline_parallel):
1776
- """Write the cluster metrices"""
1777
- step_num = len(step_info)
1778
- # Compute these five metrics mentioned above per step.
1779
- recieve_alone_time = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
1780
- stage_time, computation_time = [], []
1781
- comm_alone_time = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
1782
- collective_comm_alone_time = self._compute_time_inside_step(
1783
- collective_comm_not_overlapped_timeline, step_info
1784
- )
1785
- for step in range(step_num):
1786
- try:
1787
- if is_pipeline_parallel:
1788
- stage_time.append(step_info[step][self._duration_idx] - recieve_alone_time[step])
1789
- computation_time.append(step_info[step][self._duration_idx] - comm_alone_time[step])
1790
- except IndexError as err:
1791
- logger.error(err)
1792
- metrices_per_step_list = [computation_time, comm_alone_time, stage_time,
1793
- recieve_alone_time, collective_comm_alone_time]
1794
- if step_num > 1:
1795
- for metric in metrices_per_step_list:
1796
- metric.append(sum(metric[1:]) / (step_num - 1))
1797
- self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Ascend", self._rank_id)
1798
-
1799
- def _compute_time_inside_step(self, metric_timeline, step_time_list):
1800
- """Compute per step time of metric_timeline."""
1801
- per_step_time_list = [0 for i in range(len(step_time_list))]
1802
- step = 0
1803
- step_end_time = step_time_list[step][self._start_time_idx] + \
1804
- step_time_list[step][self._duration_idx]
1805
- for time_item in metric_timeline:
1806
- start_time = time_item[self._start_time_idx]
1807
- if start_time > step_end_time:
1808
- step += 1
1809
- if step >= len(step_time_list):
1810
- logger.warning("Compute profiler compute_time_inside_step time, "
1811
- "find the data length is more than step count, "
1812
- "maybe current graph has multi sub graph, skip the last data.")
1813
- break
1814
- step_end_time = step_time_list[step][self._start_time_idx] + \
1815
- step_time_list[step][self._duration_idx]
1816
- per_step_time_list[step] += time_item[self._duration_idx]
1817
-
1818
- return per_step_time_list
1819
-
1820
- def _get_intersection_time(self, first_time_list, second_time_list,
1821
- display_name="communication_not_overlapped"):
1822
- """Get intersection time of two time list."""
1823
- first_list_idx, second_list_idx = 0, 0
1824
- first_list_len = len(first_time_list)
1825
- second_list_len = len(second_time_list)
1826
- intersection_segment_display_list = []
1827
-
1828
- while first_list_idx < first_list_len and \
1829
- second_list_idx < second_list_len:
1830
- intersection_start = max(
1831
- first_time_list[first_list_idx][self._start_time_idx],
1832
- second_time_list[second_list_idx][self._start_time_idx]
1833
- )
1834
- intersection_end = min(
1835
- first_time_list[first_list_idx][self._duration_idx],
1836
- second_time_list[second_list_idx][self._duration_idx]
1837
- )
1838
- if intersection_start < intersection_end:
1839
- tid = self._tid_dict.get(display_name, [0, 0])
1840
- intersection_segment_display_list.append(
1841
- [display_name, tid[0],
1842
- intersection_start, intersection_end - intersection_start, tid[1]]
1843
- )
1844
- if first_time_list[first_list_idx][self._duration_idx] >= \
1845
- second_time_list[second_list_idx][self._duration_idx]:
1846
- second_list_idx += 1
1847
- else:
1848
- first_list_idx += 1
1849
-
1850
- return intersection_segment_display_list
1851
-
1852
- def _pynative_get_step_timeline_list(self, timeline_list):
1853
- """Get step timeline list for pynative model."""
1854
- step_list = []
1855
- # The timeline starts with the GetNext op
1856
- if len(timeline_list) < 2 or 'GetNext' not in timeline_list[0][self._op_name_idx] and \
1857
- 'GetNext' not in timeline_list[1][self._op_name_idx]:
1858
- return step_list
1859
- step = [-1, -1]
1860
- step_num = 0
1861
- tid = "Steps"
1862
- for timeline in timeline_list:
1863
- if 'GetNext' not in timeline[self._op_name_idx]:
1864
- continue
1865
- start_time = float(timeline[self._start_time_idx])
1866
- if step[0] == -1:
1867
- step[0] = start_time
1868
- else:
1869
- step[1] = start_time - step[0]
1870
- step_num = step_num + 1
1871
- step_list.append([str(step_num), tid, float(step[0]), step[1]])
1872
- step = [start_time, -1]
1873
- if step[0] != -1 and step[1] == -1:
1874
- step_num = step_num + 1
1875
- step_list.append([str(step_num), tid, float(step[0]),
1876
- float(timeline_list[-1][self._start_time_idx]) - step[0]])
1877
- return step_list
1878
-
1879
- def _pynative_clock_synchronize(self, timeline_list):
1880
- """Synchronize the timestamp from device to host."""
1881
- start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._rank_id}.txt")
1882
- try:
1883
- with open(start_time_file_path) as f_obj:
1884
- lines = f_obj.readlines()
1885
- # lines[0] stores the host monotonic time of start training.
1886
- host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
1887
- # lines[1] stores the gpu time of start training.
1888
- gpu_start_time = int(lines[1].strip().split(':')[-1])
1889
- except (IOError, OSError) as err:
1890
- logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
1891
- raise ProfilerIOException() from err
1892
- time_diff = gpu_start_time * 1000 - host_monotonic_start_time
1893
- for idx, time_item in enumerate(timeline_list):
1894
- timeline_list[idx][self._start_time_idx] = int(time_item[self._start_time_idx]) + time_diff
1895
- timeline_list[idx][self._start_time_idx] = timeline_list[idx][self._start_time_idx] / 1000000
1896
- timeline_list[idx][self._duration_idx] = timeline_list[idx][self._duration_idx] / 1000
1897
-
1898
- def _set_step_start_and_end_op_name(self, timeline_list):
1899
- """Set the start and end operator full name of each step."""
1900
- if not timeline_list or len(timeline_list) < 2:
1901
- return
1902
-
1903
- start_op_idx = 0
1904
- self._step_end_op_name = timeline_list[-1][self._op_name_idx]
1905
- for i, timeline in enumerate(timeline_list):
1906
- if timeline[self._op_name_idx] == self._step_end_op_name:
1907
- start_op_idx = i + 1
1908
- break
1909
-
1910
- if start_op_idx >= len(timeline_list):
1911
- start_op_idx = 0
1912
- self._step_start_op_name = timeline_list[start_op_idx][self._op_name_idx]
1913
-
1914
-
1915
- class CpuTimelineGenerator(GpuTimelineGenerator):
1916
- """Generate cpu Timeline data from file."""
1917
- _output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt"
1918
- _display_filename = 'cpu_timeline_display_{}.json'
1919
- _timeline_summary_filename = 'cpu_timeline_summary_{}.json'
1920
-
1921
- def __init__(self, profiling_dir, model):
1922
- super().__init__(profiling_dir, 0, 0, model)
1923
- self._device_target = DeviceTarget.CPU.value
1924
-
1925
- def get_timeline_data(self):
1926
- """Get timeline data from file."""
1927
- timeline_list = self.load_cpu_op_data()
1928
- factor_ns_to_ms = 1e6
1929
- factor_us_to_ms = 1e3
1930
- for time_item in timeline_list:
1931
- time_item[self._start_time_idx] = float(time_item[self._start_time_idx]) / factor_ns_to_ms
1932
- time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_us_to_ms
1933
-
1934
- return timeline_list
1935
-
1936
- def init_timeline(self):
1937
- """Init timeline metadata, adding all collected info."""
1938
- timeline_list = self._load_timeline_data()
1939
-
1940
- # Init a dict for counting the num of streams.
1941
- stream_count_dict = {}
1942
- for timeline in timeline_list:
1943
- self._parse_timeline_data(timeline, 0)
1944
- # Updating the collection of streams.
1945
- if len(timeline) == 4:
1946
- self._update_num_of_streams(timeline, stream_count_dict)
1947
-
1948
- # Add format thread meta data.
1949
- self._format_meta_data_list.extend(self._timeline_meta)
1950
- self._timeline_meta = self._format_meta_data_list
1951
-
1952
- # Update timeline summary info
1953
- self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
1954
-
1955
- def load_cpu_op_data(self):
1956
- """Load cpu operator data from file"""
1957
- op_file_path = self._get_and_validate_path(
1958
- self._output_op_execute_time_file_path)
1959
- timeline_list = []
1960
- if not os.path.exists(op_file_path):
1961
- logger.info("No cpu operator info.")
1962
- return timeline_list
1963
- timeline_list = self._load_op_data(op_file_path)
1964
- factor_ms_to_us = 1e-3
1965
- for time_item in timeline_list:
1966
- time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_ms_to_us
1967
-
1968
- return timeline_list
1969
-
1970
- def _get_and_validate_path(self, file_name):
1971
- """Generate op or activity file path from file name, and validate this path."""
1972
- file_path = os.path.join(
1973
- self._profiling_dir,
1974
- file_name.format(self._device_id)
1975
- )
1976
- file_path = validate_and_normalize_path(file_path)
1977
-
1978
- return file_path
1979
-
1980
- def _load_op_data(self, op_file_path):
1981
- """Load operator data from file"""
1982
- op_timeline_list = []
1983
- try:
1984
- with open(op_file_path, 'r') as f_obj:
1985
- for line in f_obj:
1986
- self._timeline_summary['num_of_ops'] += 1
1987
- op_list = line.strip('\n').strip().split(';')
1988
- time_arr = op_list[-1]
1989
- time_arr = time_arr.split(" ")
1990
- for time in time_arr:
1991
- time = time.split(",")
1992
- if len(time) == 3:
1993
- # for time value is [start_timestamp, duration, tid]
1994
- # line_list[1] would be like "HostCpuOps" + str(tid)
1995
- line_list = op_list[:1] + [op_list[1] + str(time[-1])] + time[:-1]
1996
- else:
1997
- # for time value is [start_timestamp, duration]
1998
- line_list = op_list[:2] + time
1999
- op_timeline_list.append(line_list)
2000
- except (IOError, OSError) as err:
2001
- logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
2002
- raise ProfilerIOException() from err
2003
-
2004
- return op_timeline_list
2005
-
2006
- def _load_timeline_data(self):
2007
- """Load timeline data from file."""
2008
- timeline_list = self.load_cpu_op_data()
2009
-
2010
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
2011
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
2012
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
2013
-
2014
- # Generate step time.
2015
- factor_start_time_uint_to_duration = 1e-3
2016
- self._set_step_start_and_end_op_name(timeline_list)
2017
-
2018
- step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
2019
-
2020
- # Add merge compute time and free time
2021
- merge_compute_timeline = self._get_merged_time_list(
2022
- timeline_list, False, "computation_op", factor_start_time_uint_to_duration)[2]
2023
- free_time_timeline = self._get_merged_time_list(
2024
- timeline_list, True, "free_time", factor_start_time_uint_to_duration)[1]
2025
-
2026
- # Add Scope Name.
2027
- default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
2028
- factor_start_time_uint_to_duration)
2029
- gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
2030
- factor_start_time_uint_to_duration)
2031
- recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
2032
- factor_start_time_uint_to_duration)
2033
- timeline_list.extend(default_scope_name_time_list)
2034
- timeline_list.extend(gradient_scope_name_time_list)
2035
- timeline_list.extend(recompute_scope_name_time_list)
2036
- timeline_list.extend(step_time_list)
2037
-
2038
- timeline_list.sort(key=lambda x: (float(x[self._start_time_idx]), x[self._tid_idx]))
2039
- timeline_list.sort(key=lambda x: float(x[2]))
2040
- timeline_list.extend(merge_compute_timeline)
2041
- timeline_list.extend(free_time_timeline)
2042
-
2043
- return timeline_list
2044
-
2045
- def _parse_timeline_data(self, timeline, min_cycle_counter):
2046
- """Parse timeline data."""
2047
- # factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
2048
- factor = 1000
2049
- op_meta = TimelineContainer(timeline)
2050
- timeline_dict = {}
2051
- timeline_dict['name'] = op_meta.op_name.split('/')[-1]
2052
- timeline_dict['ph'] = 'X'
2053
- timeline_dict['tid'] = op_meta.stream_id
2054
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) / factor
2055
- dur = op_meta.duration
2056
- timeline_dict['dur'] = dur
2057
- timeline_dict['pid'] = int(self._device_id)
2058
- if op_meta.stream_id == "Scope Name":
2059
- # remove the level of scope name which has a format like "0-conv2-Conv2d".
2060
- timeline_dict['name'] = "-".join(op_meta.op_name.split('-')[1:])
2061
- timeline_dict['scope_level'] = int(op_meta.op_name.split('-')[0])
2062
- elif self._host_cpu_op_label == op_meta.stream_id[:len(self._host_cpu_op_label)]:
2063
- timeline_dict['pid'] = self._HOST_CPU_PID
2064
-
2065
- if len(timeline) == 5:
2066
- # len(timeline) == 5 refers to analyse data.
2067
- timeline_dict["pid"] = op_meta.pid
2068
- elif op_meta.stream_id not in ["Scope Name", "Steps"]:
2069
- # Update total time of operator execution.
2070
- self._timeline_summary['total_time'] += dur / factor
2071
- self._timeline_summary['op_exe_times'] += 1
2072
-
2073
- self._update_format_meta_data(timeline_dict)
2074
- self._timeline_meta.append(timeline_dict)