mindspore 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (655) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -2
  3. mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +102 -0
  7. mindspore/_checkparam.py +1066 -1001
  8. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
  9. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
  10. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
  11. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
  12. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  13. mindspore/_extends/parse/__init__.py +5 -3
  14. mindspore/_extends/parse/namespace.py +16 -1
  15. mindspore/_extends/parse/parser.py +107 -22
  16. mindspore/_extends/parse/resources.py +0 -7
  17. mindspore/_extends/parse/standard_method.py +885 -413
  18. mindspore/amp.py +52 -57
  19. mindspore/boost/boost.py +2 -2
  20. mindspore/boost/boost_cell_wrapper.py +38 -20
  21. mindspore/boost/dim_reduce.py +3 -3
  22. mindspore/boost/group_loss_scale_manager.py +1 -1
  23. mindspore/common/__init__.py +4 -6
  24. mindspore/common/_decorator.py +2 -0
  25. mindspore/common/_register_for_adapter.py +55 -0
  26. mindspore/common/_stub_tensor.py +201 -0
  27. mindspore/common/_utils.py +41 -7
  28. mindspore/common/api.py +215 -141
  29. mindspore/common/dtype.py +8 -1
  30. mindspore/common/dump.py +2 -2
  31. mindspore/common/initializer.py +4 -2
  32. mindspore/common/jit_config.py +17 -13
  33. mindspore/common/mutable.py +33 -13
  34. mindspore/common/parameter.py +23 -21
  35. mindspore/common/seed.py +8 -24
  36. mindspore/common/sparse_tensor.py +62 -41
  37. mindspore/common/tensor.py +852 -1154
  38. mindspore/communication/__init__.py +2 -2
  39. mindspore/communication/_comm_helper.py +11 -4
  40. mindspore/communication/management.py +22 -21
  41. mindspore/config/op_info.config +501 -1008
  42. mindspore/context.py +201 -23
  43. mindspore/dataset/__init__.py +6 -6
  44. mindspore/dataset/audio/__init__.py +7 -7
  45. mindspore/dataset/audio/transforms.py +670 -30
  46. mindspore/dataset/audio/utils.py +47 -4
  47. mindspore/dataset/audio/validators.py +223 -1
  48. mindspore/dataset/callback/ds_callback.py +2 -2
  49. mindspore/dataset/core/config.py +210 -14
  50. mindspore/dataset/core/validator_helpers.py +2 -2
  51. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  52. mindspore/dataset/debug/debug_hook.py +65 -0
  53. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  54. mindspore/dataset/engine/__init__.py +7 -3
  55. mindspore/dataset/engine/cache_client.py +1 -1
  56. mindspore/dataset/engine/datasets.py +322 -66
  57. mindspore/dataset/engine/datasets_audio.py +80 -76
  58. mindspore/dataset/engine/datasets_standard_format.py +51 -38
  59. mindspore/dataset/engine/datasets_text.py +232 -118
  60. mindspore/dataset/engine/datasets_user_defined.py +41 -17
  61. mindspore/dataset/engine/datasets_vision.py +746 -225
  62. mindspore/dataset/engine/graphdata.py +75 -10
  63. mindspore/dataset/engine/iterators.py +45 -5
  64. mindspore/dataset/engine/offload.py +48 -28
  65. mindspore/dataset/engine/validators.py +117 -8
  66. mindspore/dataset/text/__init__.py +6 -5
  67. mindspore/dataset/text/transforms.py +86 -3
  68. mindspore/dataset/text/utils.py +6 -4
  69. mindspore/dataset/text/validators.py +25 -0
  70. mindspore/dataset/transforms/__init__.py +3 -2
  71. mindspore/dataset/transforms/c_transforms.py +1 -1
  72. mindspore/dataset/transforms/transforms.py +2 -2
  73. mindspore/dataset/utils/__init__.py +2 -1
  74. mindspore/dataset/utils/line_reader.py +121 -0
  75. mindspore/dataset/vision/__init__.py +2 -3
  76. mindspore/dataset/vision/c_transforms.py +9 -9
  77. mindspore/dataset/vision/py_transforms.py +5 -5
  78. mindspore/dataset/vision/py_transforms_util.py +2 -0
  79. mindspore/dataset/vision/transforms.py +160 -161
  80. mindspore/dataset/vision/utils.py +3 -3
  81. mindspore/experimental/map_parameter.py +38 -26
  82. mindspore/include/OWNERS +0 -1
  83. mindspore/include/api/callback/callback.h +9 -13
  84. mindspore/include/api/callback/ckpt_saver.h +2 -2
  85. mindspore/include/api/callback/loss_monitor.h +2 -2
  86. mindspore/include/api/callback/lr_scheduler.h +5 -5
  87. mindspore/include/api/callback/time_monitor.h +2 -2
  88. mindspore/include/api/callback/train_accuracy.h +4 -6
  89. mindspore/include/api/cfg.h +19 -6
  90. mindspore/include/api/context.h +44 -9
  91. mindspore/include/api/delegate.h +1 -1
  92. mindspore/include/api/metrics/accuracy.h +2 -2
  93. mindspore/include/api/metrics/metrics.h +4 -3
  94. mindspore/include/api/model.h +9 -4
  95. mindspore/include/api/model_parallel_runner.h +2 -2
  96. mindspore/include/api/net.h +12 -11
  97. mindspore/include/api/serialization.h +19 -3
  98. mindspore/include/api/types.h +3 -3
  99. mindspore/include/dataset/constants.h +7 -0
  100. mindspore/include/dataset/text.h +59 -0
  101. mindspore/jpeg62.dll +0 -0
  102. mindspore/log.py +1 -1
  103. mindspore/mindrecord/filereader.py +18 -0
  104. mindspore/mindrecord/filewriter.py +197 -34
  105. mindspore/mindrecord/shardreader.py +9 -0
  106. mindspore/mindrecord/shardwriter.py +1 -1
  107. mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
  108. mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
  109. mindspore/mindrecord/tools/csv_to_mr.py +3 -3
  110. mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
  111. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  112. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  113. mindspore/mindspore_backend.dll +0 -0
  114. mindspore/mindspore_common.dll +0 -0
  115. mindspore/mindspore_core.dll +0 -0
  116. mindspore/mindspore_glog.dll +0 -0
  117. mindspore/mindspore_shared_lib.dll +0 -0
  118. mindspore/nn/__init__.py +0 -4
  119. mindspore/nn/cell.py +204 -132
  120. mindspore/nn/dynamic_lr.py +1 -1
  121. mindspore/nn/grad/cell_grad.py +7 -6
  122. mindspore/nn/layer/__init__.py +5 -4
  123. mindspore/nn/layer/activation.py +40 -89
  124. mindspore/nn/layer/basic.py +255 -624
  125. mindspore/nn/layer/channel_shuffle.py +7 -6
  126. mindspore/nn/layer/combined.py +1 -1
  127. mindspore/nn/layer/container.py +41 -4
  128. mindspore/nn/layer/conv.py +64 -28
  129. mindspore/nn/layer/dense.py +9 -8
  130. mindspore/nn/layer/embedding.py +27 -25
  131. mindspore/nn/layer/image.py +53 -46
  132. mindspore/nn/layer/math.py +97 -105
  133. mindspore/nn/layer/normalization.py +117 -86
  134. mindspore/nn/layer/padding.py +185 -95
  135. mindspore/nn/layer/pooling.py +817 -414
  136. mindspore/nn/layer/rnn_cells.py +10 -15
  137. mindspore/nn/layer/rnns.py +37 -38
  138. mindspore/nn/layer/thor_layer.py +11 -12
  139. mindspore/nn/layer/timedistributed.py +5 -5
  140. mindspore/nn/layer/transformer.py +701 -0
  141. mindspore/nn/learning_rate_schedule.py +8 -8
  142. mindspore/nn/loss/__init__.py +5 -4
  143. mindspore/nn/loss/loss.py +334 -199
  144. mindspore/nn/optim/ada_grad.py +6 -6
  145. mindspore/nn/optim/adadelta.py +2 -3
  146. mindspore/nn/optim/adafactor.py +4 -5
  147. mindspore/nn/optim/adam.py +126 -62
  148. mindspore/nn/optim/adamax.py +3 -4
  149. mindspore/nn/optim/adasum.py +6 -6
  150. mindspore/nn/optim/asgd.py +2 -2
  151. mindspore/nn/optim/ftrl.py +67 -38
  152. mindspore/nn/optim/lamb.py +4 -5
  153. mindspore/nn/optim/lars.py +2 -2
  154. mindspore/nn/optim/lazyadam.py +43 -4
  155. mindspore/nn/optim/momentum.py +6 -5
  156. mindspore/nn/optim/optimizer.py +3 -1
  157. mindspore/nn/optim/proximal_ada_grad.py +2 -2
  158. mindspore/nn/optim/rmsprop.py +1 -1
  159. mindspore/nn/optim/rprop.py +8 -9
  160. mindspore/nn/optim/sgd.py +19 -13
  161. mindspore/nn/optim/thor.py +10 -15
  162. mindspore/nn/probability/__init__.py +0 -2
  163. mindspore/nn/probability/bijector/bijector.py +4 -4
  164. mindspore/nn/probability/bijector/invert.py +1 -1
  165. mindspore/nn/probability/bijector/softplus.py +2 -2
  166. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  167. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  168. mindspore/nn/probability/distribution/_utils/utils.py +9 -15
  169. mindspore/nn/probability/distribution/bernoulli.py +3 -3
  170. mindspore/nn/probability/distribution/beta.py +1 -1
  171. mindspore/nn/probability/distribution/categorical.py +5 -7
  172. mindspore/nn/probability/distribution/cauchy.py +3 -3
  173. mindspore/nn/probability/distribution/distribution.py +2 -2
  174. mindspore/nn/probability/distribution/exponential.py +2 -2
  175. mindspore/nn/probability/distribution/gamma.py +3 -3
  176. mindspore/nn/probability/distribution/geometric.py +1 -1
  177. mindspore/nn/probability/distribution/gumbel.py +3 -3
  178. mindspore/nn/probability/distribution/half_normal.py +15 -11
  179. mindspore/nn/probability/distribution/laplace.py +16 -13
  180. mindspore/nn/probability/distribution/logistic.py +2 -2
  181. mindspore/nn/probability/distribution/normal.py +1 -1
  182. mindspore/nn/probability/distribution/poisson.py +1 -1
  183. mindspore/nn/probability/distribution/student_t.py +20 -15
  184. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  185. mindspore/nn/probability/distribution/uniform.py +2 -2
  186. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  187. mindspore/nn/reinforcement/tensor_array.py +2 -2
  188. mindspore/nn/sparse/sparse.py +2 -2
  189. mindspore/nn/wrap/cell_wrapper.py +27 -10
  190. mindspore/nn/wrap/grad_reducer.py +2 -2
  191. mindspore/nn/wrap/loss_scale.py +40 -24
  192. mindspore/numpy/array_creations.py +33 -22
  193. mindspore/numpy/array_ops.py +35 -30
  194. mindspore/numpy/logic_ops.py +6 -27
  195. mindspore/numpy/math_ops.py +22 -19
  196. mindspore/numpy/utils.py +1 -1
  197. mindspore/numpy/utils_const.py +108 -58
  198. mindspore/opencv_core452.dll +0 -0
  199. mindspore/opencv_imgcodecs452.dll +0 -0
  200. mindspore/opencv_imgproc452.dll +0 -0
  201. mindspore/ops/_constants.py +0 -6
  202. mindspore/ops/_grad/__init__.py +2 -1
  203. mindspore/ops/_grad/grad_array_ops.py +86 -117
  204. mindspore/ops/_grad/grad_base.py +23 -1
  205. mindspore/ops/_grad/grad_clip_ops.py +2 -3
  206. mindspore/ops/_grad/grad_comm_ops.py +34 -24
  207. mindspore/ops/_grad/grad_implementations.py +9 -45
  208. mindspore/ops/_grad/grad_inner_ops.py +47 -4
  209. mindspore/ops/_grad/grad_math_ops.py +142 -117
  210. mindspore/ops/_grad/grad_nn_ops.py +71 -165
  211. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  212. mindspore/ops/_grad/grad_sparse.py +7 -6
  213. mindspore/ops/_grad_experimental/__init__.py +1 -0
  214. mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
  215. mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
  216. mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
  217. mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
  218. mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
  219. mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
  220. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  221. mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
  222. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  223. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
  224. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  225. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  226. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
  227. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  228. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  229. mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
  230. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  231. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
  232. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  233. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  234. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  235. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  236. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  237. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  238. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  239. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  240. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  241. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  242. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  243. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  244. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  245. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  246. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  247. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  248. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  249. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  250. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
  251. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  252. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  253. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  254. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  255. mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
  256. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  257. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
  258. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  259. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  260. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  261. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  262. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  263. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
  264. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  265. mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
  266. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  267. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  268. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
  269. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  270. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  271. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
  272. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  273. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  274. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  275. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  276. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  277. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  278. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  279. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  280. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  281. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
  282. mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
  283. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  284. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  285. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  286. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  287. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  288. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  289. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  290. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  291. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  292. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  293. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  294. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  295. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  296. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  297. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  298. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  299. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  300. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  301. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  302. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  303. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  304. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  305. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  306. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  307. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  308. mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
  309. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
  310. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  311. mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
  312. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  313. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  314. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  315. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  316. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  317. mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
  318. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
  319. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  320. mindspore/ops/_op_impl/tbe/__init__.py +27 -611
  321. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  322. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  323. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  324. mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
  325. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  326. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  327. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  328. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  329. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  330. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  331. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  332. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  333. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  334. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
  335. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  336. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  337. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  338. mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
  339. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  340. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  341. mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
  342. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
  343. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  344. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  345. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  346. mindspore/ops/_register_for_op.py +1 -0
  347. mindspore/ops/_utils/__init__.py +1 -2
  348. mindspore/ops/_utils/utils.py +19 -40
  349. mindspore/ops/_vmap/vmap_array_ops.py +116 -38
  350. mindspore/ops/_vmap/vmap_base.py +16 -9
  351. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  352. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  353. mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
  354. mindspore/ops/_vmap/vmap_image_ops.py +12 -5
  355. mindspore/ops/_vmap/vmap_math_ops.py +46 -5
  356. mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
  357. mindspore/ops/_vmap/vmap_random_ops.py +1 -1
  358. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  359. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  360. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  361. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  362. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  363. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  364. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  365. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  366. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
  367. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  368. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  369. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  370. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  371. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  372. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  373. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  374. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  375. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  376. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  377. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
  378. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
  379. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  380. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  381. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  382. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  383. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  384. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  385. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
  386. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  387. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
  388. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
  389. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  390. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  391. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  392. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  393. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  394. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  395. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  396. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  397. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  398. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  399. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  400. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  401. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  402. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  403. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  404. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  405. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  406. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  407. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  408. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
  409. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  410. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  411. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  412. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  413. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  414. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  415. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  416. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
  417. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
  418. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  419. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
  420. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  421. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  422. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  423. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  424. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  425. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  426. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  427. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  428. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  429. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
  430. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
  431. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  432. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  433. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  434. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  435. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
  436. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  437. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  438. mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
  439. mindspore/ops/composite/__init__.py +7 -8
  440. mindspore/ops/composite/base.py +101 -47
  441. mindspore/ops/composite/math_ops.py +188 -158
  442. mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
  443. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
  444. mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
  445. mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
  446. mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
  447. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  448. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  449. mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
  450. mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
  451. mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
  452. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
  453. mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
  454. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  455. mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
  456. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
  457. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
  458. mindspore/ops/function/__init__.py +152 -8
  459. mindspore/ops/function/array_func.py +2555 -674
  460. mindspore/ops/function/clip_func.py +209 -13
  461. mindspore/ops/function/debug_func.py +2 -2
  462. mindspore/ops/function/grad/__init__.py +2 -1
  463. mindspore/ops/function/grad/grad_func.py +147 -62
  464. mindspore/ops/function/image_func.py +54 -38
  465. mindspore/ops/function/linalg_func.py +167 -16
  466. mindspore/ops/function/math_func.py +4849 -1492
  467. mindspore/ops/function/nn_func.py +2573 -988
  468. mindspore/ops/function/other_func.py +115 -0
  469. mindspore/ops/function/parameter_func.py +3 -3
  470. mindspore/ops/function/random_func.py +790 -73
  471. mindspore/ops/function/sparse_func.py +98 -78
  472. mindspore/ops/function/sparse_unary_func.py +54 -53
  473. mindspore/ops/function/spectral_func.py +27 -24
  474. mindspore/ops/function/vmap_func.py +22 -2
  475. mindspore/ops/functional.py +97 -37
  476. mindspore/ops/op_info_register.py +70 -28
  477. mindspore/ops/operations/__init__.py +47 -14
  478. mindspore/ops/operations/_csr_ops.py +7 -7
  479. mindspore/ops/operations/_embedding_cache_ops.py +5 -5
  480. mindspore/ops/operations/_grad_ops.py +276 -187
  481. mindspore/ops/operations/_inner_ops.py +319 -113
  482. mindspore/ops/operations/_ms_kernel.py +10 -8
  483. mindspore/ops/operations/_ocr_ops.py +9 -9
  484. mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
  485. mindspore/ops/operations/_quant_ops.py +137 -102
  486. mindspore/ops/operations/_rl_inner_ops.py +121 -60
  487. mindspore/ops/operations/_scalar_ops.py +466 -0
  488. mindspore/ops/operations/_sequence_ops.py +1004 -2
  489. mindspore/ops/operations/_tensor_array.py +10 -11
  490. mindspore/ops/operations/_thor_ops.py +1 -1
  491. mindspore/ops/operations/array_ops.py +801 -466
  492. mindspore/ops/operations/comm_ops.py +51 -49
  493. mindspore/ops/operations/control_ops.py +2 -2
  494. mindspore/ops/operations/custom_ops.py +123 -44
  495. mindspore/ops/operations/debug_ops.py +24 -24
  496. mindspore/ops/operations/image_ops.py +240 -153
  497. mindspore/ops/operations/inner_ops.py +34 -50
  498. mindspore/ops/operations/linalg_ops.py +31 -9
  499. mindspore/ops/operations/math_ops.py +988 -757
  500. mindspore/ops/operations/nn_ops.py +965 -819
  501. mindspore/ops/operations/other_ops.py +51 -40
  502. mindspore/ops/operations/random_ops.py +204 -122
  503. mindspore/ops/operations/rl_ops.py +8 -9
  504. mindspore/ops/operations/sparse_ops.py +254 -93
  505. mindspore/ops/operations/spectral_ops.py +35 -3
  506. mindspore/ops/primitive.py +111 -9
  507. mindspore/parallel/_auto_parallel_context.py +189 -83
  508. mindspore/parallel/_offload_context.py +185 -0
  509. mindspore/parallel/_parallel_serialization.py +99 -7
  510. mindspore/parallel/_ps_context.py +9 -5
  511. mindspore/parallel/_recovery_context.py +1 -1
  512. mindspore/parallel/_tensor.py +7 -1
  513. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  514. mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
  515. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  516. mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
  517. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  518. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
  519. mindspore/parallel/_utils.py +1 -2
  520. mindspore/parallel/algo_parameter_config.py +1 -1
  521. mindspore/parallel/checkpoint_transform.py +37 -34
  522. mindspore/parallel/shard.py +17 -18
  523. mindspore/profiler/common/validator/validate_path.py +2 -2
  524. mindspore/profiler/envprofiling.py +69 -47
  525. mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
  526. mindspore/profiler/parser/base_timeline_generator.py +49 -56
  527. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
  528. mindspore/profiler/parser/hwts_log_parser.py +1 -1
  529. mindspore/profiler/parser/integrator.py +15 -14
  530. mindspore/profiler/parser/minddata_analyzer.py +2 -2
  531. mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
  532. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  533. mindspore/profiler/parser/optime_parser.py +17 -18
  534. mindspore/profiler/parser/profiler_info.py +2 -1
  535. mindspore/profiler/profiling.py +218 -186
  536. mindspore/rewrite/__init__.py +3 -1
  537. mindspore/rewrite/api/node.py +1 -114
  538. mindspore/rewrite/api/node_type.py +3 -0
  539. mindspore/rewrite/api/pattern_engine.py +31 -1
  540. mindspore/rewrite/api/scoped_value.py +4 -4
  541. mindspore/rewrite/api/symbol_tree.py +3 -78
  542. mindspore/rewrite/api/tree_node_helper.py +1 -1
  543. mindspore/rewrite/ast_creator_register.py +1 -0
  544. mindspore/rewrite/ast_helpers/__init__.py +2 -2
  545. mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
  546. mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
  547. mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
  548. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
  549. mindspore/rewrite/namespace.py +0 -2
  550. mindspore/rewrite/node.py +157 -11
  551. mindspore/rewrite/parsers/assign_parser.py +231 -53
  552. mindspore/rewrite/parsers/class_def_parser.py +187 -109
  553. mindspore/rewrite/parsers/for_parser.py +24 -14
  554. mindspore/rewrite/parsers/function_def_parser.py +21 -4
  555. mindspore/rewrite/parsers/if_parser.py +6 -2
  556. mindspore/rewrite/sparsify/__init__.py +0 -0
  557. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  558. mindspore/rewrite/sparsify/sparsify.py +109 -0
  559. mindspore/rewrite/sparsify/utils.py +173 -0
  560. mindspore/rewrite/symbol_tree.py +256 -133
  561. mindspore/rewrite/symbol_tree_builder.py +38 -1
  562. mindspore/run_check/_check_version.py +69 -63
  563. mindspore/run_check/run_check.py +2 -1
  564. mindspore/tinyxml2.dll +0 -0
  565. mindspore/train/__init__.py +1 -1
  566. mindspore/train/_utils.py +28 -5
  567. mindspore/train/amp.py +273 -102
  568. mindspore/train/callback/_backup_and_restore.py +5 -5
  569. mindspore/train/callback/_callback.py +2 -2
  570. mindspore/train/callback/_checkpoint.py +3 -3
  571. mindspore/train/callback/_early_stop.py +3 -3
  572. mindspore/train/callback/_lambda_callback.py +2 -2
  573. mindspore/train/callback/_landscape.py +29 -31
  574. mindspore/train/callback/_loss_monitor.py +3 -3
  575. mindspore/train/callback/_on_request_exit.py +3 -3
  576. mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
  577. mindspore/train/callback/_summary_collector.py +23 -16
  578. mindspore/train/callback/_time_monitor.py +3 -3
  579. mindspore/train/checkpoint_pb2.py +68 -8
  580. mindspore/train/data_sink.py +15 -3
  581. mindspore/train/dataset_helper.py +10 -15
  582. mindspore/train/loss_scale_manager.py +8 -11
  583. mindspore/train/metrics/__init__.py +1 -1
  584. mindspore/train/metrics/bleu_score.py +1 -1
  585. mindspore/train/metrics/confusion_matrix.py +1 -1
  586. mindspore/train/metrics/cosine_similarity.py +1 -1
  587. mindspore/train/metrics/dice.py +2 -2
  588. mindspore/train/metrics/fbeta.py +1 -1
  589. mindspore/train/metrics/hausdorff_distance.py +4 -3
  590. mindspore/train/metrics/mean_surface_distance.py +2 -2
  591. mindspore/train/metrics/occlusion_sensitivity.py +1 -1
  592. mindspore/train/metrics/perplexity.py +1 -1
  593. mindspore/train/metrics/precision.py +1 -1
  594. mindspore/train/metrics/recall.py +1 -1
  595. mindspore/train/metrics/roc.py +2 -2
  596. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  597. mindspore/train/mind_ir_pb2.py +116 -37
  598. mindspore/train/model.py +45 -28
  599. mindspore/train/serialization.py +295 -188
  600. mindspore/train/summary/_summary_adapter.py +1 -1
  601. mindspore/train/summary/summary_record.py +43 -13
  602. mindspore/train/train_thor/convert_utils.py +2 -2
  603. mindspore/train/train_thor/dataset_helper.py +3 -3
  604. mindspore/turbojpeg.dll +0 -0
  605. mindspore/version.py +1 -1
  606. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
  607. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
  608. mindspore/compression/__init__.py +0 -19
  609. mindspore/compression/common/constant.py +0 -124
  610. mindspore/compression/export/__init__.py +0 -19
  611. mindspore/compression/export/quant_export.py +0 -515
  612. mindspore/compression/quant/__init__.py +0 -28
  613. mindspore/compression/quant/qat.py +0 -634
  614. mindspore/compression/quant/quant_utils.py +0 -462
  615. mindspore/compression/quant/quantizer.py +0 -68
  616. mindspore/nn/layer/quant.py +0 -1868
  617. mindspore/nn/layer/rnn_utils.py +0 -90
  618. mindspore/nn/probability/dpn/__init__.py +0 -22
  619. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  620. mindspore/nn/probability/dpn/vae/cvae.py +0 -140
  621. mindspore/nn/probability/dpn/vae/vae.py +0 -124
  622. mindspore/nn/probability/infer/__init__.py +0 -22
  623. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  624. mindspore/nn/probability/infer/variational/svi.py +0 -84
  625. mindspore/nn/probability/toolbox/__init__.py +0 -22
  626. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  627. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
  628. mindspore/nn/probability/transforms/__init__.py +0 -22
  629. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  630. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  631. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  632. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  633. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  634. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  635. mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
  636. mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
  637. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
  638. mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
  639. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
  640. mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
  642. mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
  643. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
  644. mindspore/ops/composite/array_ops.py +0 -241
  645. mindspore/ops/composite/clip_ops.py +0 -134
  646. mindspore/ops/composite/random_ops.py +0 -426
  647. mindspore/ops/composite/vmap_ops.py +0 -38
  648. mindspore/parallel/nn/__init__.py +0 -42
  649. mindspore/parallel/nn/loss.py +0 -22
  650. mindspore/parallel/nn/moe.py +0 -21
  651. mindspore/parallel/nn/op_parallel_config.py +0 -22
  652. mindspore/parallel/nn/transformer.py +0 -31
  653. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  654. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  655. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -30,18 +30,17 @@ import mindspore.common.dtype as mstype
30
30
  from mindspore.ops import operations as P
31
31
  from mindspore.ops import functional as F
32
32
  from mindspore.nn.cell import Cell
33
- from mindspore._checkparam import Validator
33
+ from mindspore import _checkparam as Validator
34
34
  from mindspore import log as logger
35
- from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation
35
+ from mindspore.parallel._utils import _get_parallel_mode
36
36
  from mindspore.context import ParallelMode
37
37
  from mindspore.log import _LogActionOnce
38
- from mindspore.nn.transformer.layers import _LayerNorm, _Linear, _check_input_shape, \
38
+ from mindspore.parallel._transformer.layers import _LayerNorm, _Linear, \
39
39
  _args_type_validator_check, _valid_type_checks, _valid_value_checks, \
40
- _check_shape_equal, _check_past_none_input_none, _check_input_dtype, _check_input_shape_value, \
41
- _check_shape_equal_without_batch
42
- from mindspore.nn.transformer.op_parallel_config import default_dpmp_config, _PipeLineConfig, OpParallelConfig, \
40
+ _check_past_none_input_none, _check_input_dtype
41
+ from mindspore.parallel._transformer.op_parallel_config import default_dpmp_config, _PipeLineConfig, OpParallelConfig, \
43
42
  _Config, _check_config, MoEParallelConfig
44
- from mindspore.nn.transformer.moe import default_moe_config, MoE, _check_moe_config
43
+ from mindspore.parallel._transformer.moe import default_moe_config, MoE, _check_moe_config
45
44
 
46
45
  __all__ = [
47
46
  "AttentionMask",
@@ -399,13 +398,13 @@ class FeedForward(Cell):
399
398
  (2, 20, 15)
400
399
  >>> # Example 2 using custom hidden activation
401
400
  >>> class MyActivationNoShard(nn.Cell):
402
- >>> def __init__(self):
403
- >>> super(MyActivationNoShard, self).__init__()
404
- >>> self.add = ops.Add()
405
- >>> def construct(self, x):
406
- >>> return self.add(x, 0.1)
401
+ ... def __init__(self):
402
+ ... super(MyActivationNoShard, self).__init__()
403
+ ... self.add = ops.Add()
404
+ ... def construct(self, x):
405
+ ... return self.add(x, 0.1)
407
406
  >>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1,
408
- >>> hidden_act=MyActivationNoShard)
407
+ ... hidden_act=MyActivationNoShard)
409
408
  >>> tensor = Tensor(np.ones((2, 20, 15)), mstype.float32)
410
409
  >>> output = model(tensor)
411
410
  >>> print(output.shape)
@@ -415,16 +414,16 @@ class FeedForward(Cell):
415
414
  >>> # a class function named activation_shard. It accepts the argument parallel_config (OpParallelConfig,
416
415
  >>> # MoEParallelConfig) and set the shard for the primitives used in the construct.
417
416
  >>> class MyActivationWithShard(nn.Cell):
418
- >>> def __init__(self):
419
- >>> super(MyActivationWithShard, self).__init__()
420
- >>> self.add = ops.Add()
421
- >>> def construct(self, x):
422
- >>> return self.add(x, 0.1)
423
- >>> def activation_shard(self, parallel_config):
424
- >>> self.add.shard(((parallel_config.data_parallel, parallel_config.model_parallel), ()))
417
+ ... def __init__(self):
418
+ ... super(MyActivationWithShard, self).__init__()
419
+ ... self.add = ops.Add()
420
+ ... def construct(self, x):
421
+ ... return self.add(x, 0.1)
422
+ ... def activation_shard(self, parallel_config):
423
+ ... self.add.shard(((parallel_config.data_parallel, parallel_config.model_parallel), ()))
425
424
  >>>
426
425
  >>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1,
427
- >>> hidden_act=MyActivationWithShard)
426
+ ... hidden_act=MyActivationWithShard)
428
427
  >>> tensor = Tensor(np.ones((2, 20, 15)), mstype.float32)
429
428
  >>> output = model(tensor)
430
429
  >>> print(output.shape)
@@ -451,7 +450,7 @@ class FeedForward(Cell):
451
450
  if hidden_act is None or not (isinstance(hidden_act, str) or issubclass(hidden_act, nn.Cell)):
452
451
  raise TypeError(f"For FeedForward cell, the hidden_act should str type or nn.Cell type, "
453
452
  f"but got {hidden_act}.")
454
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
453
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
455
454
  _check_config(parallel_config)
456
455
  mp = parallel_config.model_parallel
457
456
  if expert_num > 1:
@@ -497,9 +496,9 @@ class FeedForward(Cell):
497
496
  else:
498
497
  self.projection.shard(strategy_matmul=((dp, mp), (mp, 1)))
499
498
  self.projection.bias.parallel_optimizer = False
500
- self.dropout = nn.Dropout(1 - dropout_rate)
501
- self.dropout_3d = nn.Dropout(1 - dropout_rate)
502
- self.dropout_4d = nn.Dropout(1 - dropout_rate)
499
+ self.dropout = nn.Dropout(p=dropout_rate)
500
+ self.dropout_3d = nn.Dropout(p=dropout_rate)
501
+ self.dropout_4d = nn.Dropout(p=dropout_rate)
503
502
  self.cast = P.Cast()
504
503
  else:
505
504
  _check_config(parallel_config)
@@ -557,16 +556,18 @@ class FeedForward(Cell):
557
556
  self.projection.shard(strategy_matmul=((dp, mp), (mp, 1)),
558
557
  strategy_bias=((dp, 1), (1,)))
559
558
  self.projection.bias.parallel_optimizer = False
560
- self.dropout = nn.Dropout(1 - dropout_rate)
559
+ self.dropout = nn.Dropout(p=dropout_rate)
561
560
  self.dropout.dropout.shard(((dp, 1),))
562
- self.dropout_3d = nn.Dropout(1 - dropout_rate)
561
+ self.dropout_3d = nn.Dropout(p=dropout_rate)
563
562
  self.dropout_3d.dropout.shard(((dp, 1, 1),))
564
- self.dropout_4d = nn.Dropout(1 - dropout_rate)
563
+ self.dropout_4d = nn.Dropout(p=dropout_rate)
565
564
  self.dropout_4d.dropout.shard(((dp, ep, 1, 1),))
566
565
  self.cast = P.Cast()
566
+ # for grouped pairwise exchange alltoall method in pass
567
+ self.mapping.matmul.add_prim_attr("gpea_label", True)
568
+ self.projection.matmul.add_prim_attr("gpea_label", True)
567
569
 
568
570
  def construct(self, x):
569
- _check_input_shape(F.shape(x), "x", self.cls_name, [2, 3])
570
571
  _check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16], self.cls_name)
571
572
  x = self.cast(x, mstype.float16)
572
573
  # returned shape is [bs, seq_length, ffn_hidden_size] or [bs * seq_length, ffn_hidden_size]
@@ -639,9 +640,7 @@ class AttentionMask(Cell):
639
640
  self.multiply = P.Mul().shard(((parallel_config.data_parallel, 1, 1), (1, 1, 1)))
640
641
 
641
642
  def construct(self, input_mask):
642
- _check_input_shape(F.shape(input_mask), "input_mask", self.cls_name, 2)
643
643
  _check_input_dtype(F.dtype(input_mask), "input_mask", [mstype.float32, mstype.float16], self.cls_name)
644
- _check_input_shape_value(F.shape(input_mask), 1, "input_mask", self.cls_name, self.seq_length)
645
644
  input_mask = P.Cast()(self.not_equal(input_mask, 0), mstype.float16)
646
645
  input_shape = P.Shape()(input_mask)
647
646
  shape_right = (input_shape[0], 1, input_shape[1])
@@ -736,7 +735,6 @@ class VocabEmbedding(Cell):
736
735
  f"model parallel for the embedding lookup.")
737
736
 
738
737
  def construct(self, input_ids):
739
- _check_input_shape(F.shape(input_ids), "input_ids", self.cls_name, 2)
740
738
  _check_input_dtype(F.dtype(input_ids), "input_ids", [mstype.int32], self.cls_name)
741
739
  output = self.gather(self.embedding_table, input_ids, 0)
742
740
  return output, self.embedding_table.value()
@@ -904,7 +902,7 @@ class MultiHeadAttention(Cell):
904
902
  ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
905
903
  if batch_size:
906
904
  Validator.check_positive_int(batch_size)
907
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
905
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
908
906
  _check_config(parallel_config)
909
907
  self.src_seq_length = src_seq_length
910
908
  self.tgt_seq_length = tgt_seq_length
@@ -955,8 +953,8 @@ class MultiHeadAttention(Cell):
955
953
  # Normalize factor for attention, sqrt(dk) as widely used
956
954
  self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head)))
957
955
  self.use_past = use_past
958
- self.dropout = nn.Dropout(1 - hidden_dropout_rate)
959
- self.prob_dropout = nn.Dropout(1 - attention_dropout_rate)
956
+ self.dropout = nn.Dropout(p=hidden_dropout_rate)
957
+ self.prob_dropout = nn.Dropout(p=attention_dropout_rate)
960
958
  self.softmax = nn.Softmax().to_float(softmax_compute_type)
961
959
  self.softmax_3d = nn.Softmax().to_float(softmax_compute_type)
962
960
  self.expand_dims = P.ExpandDims()
@@ -1056,9 +1054,9 @@ class MultiHeadAttention(Cell):
1056
1054
  # Normalize factor for attention, sqrt(dk) as widely used
1057
1055
  self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head)))
1058
1056
  self.use_past = use_past
1059
- self.dropout = nn.Dropout(1 - hidden_dropout_rate)
1057
+ self.dropout = nn.Dropout(p=hidden_dropout_rate)
1060
1058
  self.dropout.dropout.shard(((parallel_config.data_parallel, 1),))
1061
- self.prob_dropout = nn.Dropout(1 - attention_dropout_rate)
1059
+ self.prob_dropout = nn.Dropout(p=attention_dropout_rate)
1062
1060
  self.prob_dropout.dropout.shard(
1063
1061
  ((parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),))
1064
1062
  self.softmax = nn.Softmax().to_float(softmax_compute_type)
@@ -1204,46 +1202,22 @@ class MultiHeadAttention(Cell):
1204
1202
 
1205
1203
  def _get_batch_size_from_query(self, query):
1206
1204
  r"""Get the batch size from query tensor"""
1207
- batch_size = None
1208
1205
  # For the incremental prediction, the seq length for the input is 1.
1209
- if len(F.shape(query)) == 2 and self.is_first_iteration:
1210
- batch_size = F.shape(query)[0] // self.src_seq_length
1211
- else:
1212
- batch_size = F.shape(query)[0]
1213
- return batch_size
1206
+ if len(F.shape(query)) == 2 and ((self.use_past and self.is_first_iteration) or (not self.use_past)):
1207
+ return F.shape(query)[0] // self.src_seq_length
1208
+ return F.shape(query)[0]
1214
1209
 
1215
1210
  def _get_seq_length_under_incremental(self, length):
1216
1211
  r"""Return the length of the tensor.
1217
1212
  For the incremental prediction, the seq length for the input is 1.
1218
1213
  """
1219
- if self.is_first_iteration:
1220
- return length
1221
- return 1
1214
+ if self.use_past and not self.is_first_iteration:
1215
+ return 1
1216
+ return length
1222
1217
 
1223
1218
  def _check_inputs(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
1224
1219
  value_past=None, batch_valid_length=None):
1225
1220
  r"""Check inputs"""
1226
- if not self.use_past or (self.use_past and self.is_first_iteration):
1227
- _check_shape_equal_without_batch(F.shape(query_tensor), "query_tensor", self.cls_name,
1228
- [self.src_seq_length, self.hidden_size])
1229
- _check_shape_equal_without_batch(F.shape(key_tensor), "key_tensor", self.cls_name,
1230
- [self.tgt_seq_length, self.hidden_size])
1231
- _check_shape_equal_without_batch(F.shape(value_tensor), "value_tensor", self.cls_name,
1232
- [self.tgt_seq_length, self.hidden_size])
1233
- if attention_mask is not None:
1234
- _check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
1235
- [F.shape(attention_mask)[0], self.src_seq_length, self.tgt_seq_length])
1236
- else:
1237
- _check_shape_equal(F.shape(query_tensor), "query_tensor", self.cls_name,
1238
- [[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
1239
- _check_shape_equal(F.shape(key_tensor), "key_tensor", self.cls_name,
1240
- [[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
1241
- _check_shape_equal(F.shape(value_tensor), "value_tensor", self.cls_name,
1242
- [[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
1243
- if attention_mask is not None:
1244
- _check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
1245
- [[self.batch_size, 1, self.tgt_seq_length], [self.batch_size, self.hidden_size]])
1246
-
1247
1221
  _check_input_dtype(F.dtype(query_tensor), "query_tensor", [mstype.float32, mstype.float16], self.cls_name)
1248
1222
  _check_input_dtype(F.dtype(key_tensor), "key_tensor", [mstype.float32, mstype.float16], self.cls_name)
1249
1223
  _check_input_dtype(F.dtype(value_tensor), "value_tensor", [mstype.float32, mstype.float16], self.cls_name)
@@ -1264,13 +1238,8 @@ class MultiHeadAttention(Cell):
1264
1238
  _check_past_none_input_none(self.use_past, "batch_valid_length", self.cls_name, None,
1265
1239
  batch_valid_length_is_tensor, batch_is_default)
1266
1240
  if self.use_past:
1267
- _check_shape_equal(F.shape(key_past), "key_past", self.cls_name,
1268
- [self.batch_size, self.n_head, self.size_per_head, self.tgt_seq_length])
1269
1241
  _check_input_dtype(F.dtype(key_past), "key_past", [mstype.float16], self.cls_name)
1270
- _check_shape_equal(F.shape(value_past), "value_past", self.cls_name,
1271
- [self.batch_size, self.n_head, self.tgt_seq_length, self.size_per_head])
1272
1242
  _check_input_dtype(F.dtype(value_past), "value_past", [mstype.float16], self.cls_name)
1273
- _check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
1274
1243
  _check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
1275
1244
  return True
1276
1245
 
@@ -1528,7 +1497,7 @@ class TransformerEncoderLayer(Cell):
1528
1497
  if batch_size or use_past:
1529
1498
  Validator.check_positive_int(batch_size)
1530
1499
  self.batch_size = batch_size
1531
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
1500
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
1532
1501
  _check_config(parallel_config)
1533
1502
  if num_heads % parallel_config.model_parallel != 0:
1534
1503
  raise ValueError(
@@ -1770,17 +1739,6 @@ class TransformerEncoderLayer(Cell):
1770
1739
 
1771
1740
  def _check_input(self, x, input_mask, init_reset, batch_valid_length):
1772
1741
  r"""Check inputs"""
1773
- if not self.use_past or (self.use_past and self.is_first_iteration):
1774
- _check_shape_equal_without_batch(F.shape(x), "x", self.cls_name,
1775
- [self.seq_length, self.hidden_size])
1776
- if input_mask is not None:
1777
- _check_shape_equal(F.shape(input_mask), "input_mask", self.cls_name,
1778
- [F.shape(input_mask)[0], self.seq_length, self.seq_length])
1779
- else:
1780
- _check_shape_equal(F.shape(x), "x", self.cls_name, [self.batch_size, 1, self.hidden_size])
1781
- if input_mask is not None:
1782
- _check_shape_equal(F.shape(input_mask), "input_mask", self.cls_name,
1783
- [F.shape(input_mask)[0], 1, self.seq_length])
1784
1742
  _check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16], self.cls_name)
1785
1743
  if input_mask is not None:
1786
1744
  _check_input_dtype(F.dtype(input_mask), "input_mask", [mstype.float32, mstype.float16], self.cls_name)
@@ -1795,9 +1753,7 @@ class TransformerEncoderLayer(Cell):
1795
1753
  batch_valid_length_is_tensor, batch_is_default)
1796
1754
 
1797
1755
  if self.use_past:
1798
- _check_shape_equal(F.shape(init_reset), "init_reset", self.cls_name, [1])
1799
1756
  _check_input_dtype(F.dtype(init_reset), "init_reset", [mstype.bool_], self.cls_name)
1800
- _check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
1801
1757
  _check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
1802
1758
  return True
1803
1759
 
@@ -1933,7 +1889,7 @@ class TransformerDecoderLayer(Cell):
1933
1889
  config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
1934
1890
  if batch_size or use_past:
1935
1891
  Validator.check_positive_int(batch_size)
1936
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
1892
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
1937
1893
  _check_config(parallel_config)
1938
1894
  if num_heads % parallel_config.model_parallel != 0:
1939
1895
  raise ValueError("For 'TransformerDecoderLayer', the class variable 'num_heads' must be divisibled by "
@@ -2226,31 +2182,14 @@ class TransformerDecoderLayer(Cell):
2226
2182
 
2227
2183
  def _check_input(self, hidden_states, attention_mask, encoder_output, memory_mask, init_reset, batch_valid_length):
2228
2184
  r"""Check inputs"""
2229
- if not self.use_past or (self.use_past and self.is_first_iteration):
2230
- _check_shape_equal_without_batch(F.shape(hidden_states), "hidden_states", self.cls_name,
2231
- [self.tgt_seq_length, self.hidden_size])
2232
- if attention_mask is not None:
2233
- _check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
2234
- [F.shape(attention_mask)[0], self.tgt_seq_length, self.tgt_seq_length])
2235
-
2236
- else:
2237
- _check_shape_equal(F.shape(hidden_states), "hidden_states", self.cls_name,
2238
- [self.batch_size, 1, self.hidden_size])
2239
- if attention_mask is not None:
2240
- _check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
2241
- [self.batch_size, 1, self.tgt_seq_length])
2242
2185
  _check_input_dtype(F.dtype(hidden_states), "hidden_states", [mstype.float32, mstype.float16], self.cls_name)
2243
2186
  if attention_mask is not None:
2244
2187
  _check_input_dtype(F.dtype(attention_mask), "attention_mask", [mstype.float32, mstype.float16],
2245
2188
  self.cls_name)
2246
2189
  if encoder_output is not None:
2247
- _check_shape_equal_without_batch(F.shape(encoder_output), "encoder_output", self.cls_name,
2248
- [self.src_seq_length, self.hidden_size])
2249
2190
  _check_input_dtype(F.dtype(encoder_output), "encoder_output",
2250
2191
  [mstype.float32, mstype.float16], self.cls_name)
2251
2192
  if memory_mask is not None:
2252
- _check_shape_equal_without_batch(F.shape(memory_mask), "memory_mask", self.cls_name,
2253
- [self.tgt_seq_length, self.src_seq_length])
2254
2193
  _check_input_dtype(F.dtype(memory_mask), "memory_mask",
2255
2194
  [mstype.float32, mstype.float16], self.cls_name)
2256
2195
 
@@ -2264,9 +2203,7 @@ class TransformerDecoderLayer(Cell):
2264
2203
  batch_valid_length_is_tensor, batch_is_default)
2265
2204
 
2266
2205
  if self.use_past:
2267
- _check_shape_equal(F.shape(init_reset), "init_reset", self.cls_name, [1])
2268
2206
  _check_input_dtype(F.dtype(init_reset), "init_reset", [mstype.bool_], self.cls_name)
2269
- _check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
2270
2207
  _check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
2271
2208
  return True
2272
2209
 
@@ -2487,7 +2424,7 @@ class TransformerEncoder(Cell):
2487
2424
  _check_moe_config(moe_config, parallel_config)
2488
2425
  self.use_moe = (moe_config.expert_num > 1)
2489
2426
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2490
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
2427
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2491
2428
  self.add = P.Add()
2492
2429
  self.aux_loss = Tensor(0.0, mstype.float32)
2493
2430
  self.num_layers = num_layers
@@ -2723,7 +2660,7 @@ class TransformerDecoder(Cell):
2723
2660
  _check_config(parallel_config)
2724
2661
  self.use_moe = (moe_config.expert_num > 1)
2725
2662
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2726
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
2663
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2727
2664
  self.add = P.Add()
2728
2665
  self.aux_loss = Tensor(0.0, mstype.float32)
2729
2666
  self.num_layers = num_layers
@@ -2827,8 +2764,8 @@ class Transformer(Cell):
2827
2764
  the residual addition before the layer normalization. And the default hidden act is `gelu`.
2828
2765
  The details can be found in `Attention is all you need <https://arxiv.org/pdf/1706.03762v5.pdf>`_.
2829
2766
 
2830
- Note:
2831
- This is an experimental interface that is subject to change or deletion.
2767
+ .. warning::
2768
+ This is an experimental API that is subject to change or deletion.
2832
2769
 
2833
2770
  Args:
2834
2771
  hidden_size(int): The hidden size of the input.
@@ -2986,7 +2923,7 @@ class Transformer(Cell):
2986
2923
  moe_config=default_moe_config,
2987
2924
  parallel_config=default_transformer_config):
2988
2925
  super(Transformer, self).__init__()
2989
- if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
2926
+ if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2990
2927
  _check_config(parallel_config)
2991
2928
  self.batch_size = batch_size
2992
2929
  self.hidden_size = hidden_size
@@ -52,9 +52,8 @@ def _is_in_hybrid_parallel_mode():
52
52
 
53
53
 
54
54
  def _is_pynative_parallel():
55
- run_mode = context.get_context('mode')
56
55
  parallel_mode = context.get_auto_parallel_context('parallel_mode')
57
- return run_mode == context.PYNATIVE_MODE and parallel_mode in (
56
+ return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
58
57
  context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
59
58
 
60
59
 
@@ -229,7 +229,7 @@ def set_algo_parameters(**kwargs):
229
229
  """
230
230
  Set parameters in the algorithm for parallel strategy searching. See a typical use in
231
231
  `test_auto_parallel_resnet.py
232
- <https://gitee.com/mindspore/mindspore/blob/r2.0.0-alpha/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
232
+ <https://gitee.com/mindspore/mindspore/blob/r2.0/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
233
233
 
234
234
  Note:
235
235
  The attribute name is required. This interface works ONLY in AUTO_PARALLEL mode.
@@ -22,8 +22,9 @@ from collections import defaultdict
22
22
  import numpy as np
23
23
  import mindspore as ms
24
24
  from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
25
- _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, _load_strategy_file, \
26
- _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num
25
+ _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, \
26
+ _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
27
+ _merge_protobuf_strategy, _merge_json_strategy
27
28
 
28
29
 
29
30
  __all__ = ["merge_pipeline_strategys", "rank_list_for_transform", "transform_checkpoint_by_rank",
@@ -33,6 +34,9 @@ __all__ = ["merge_pipeline_strategys", "rank_list_for_transform", "transform_che
33
34
  def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
34
35
  """
35
36
  Merge parallel strategy between all pipeline stages in pipeline parallel mode.
37
+ For more details about converting distributed Checkpoint, please refer to
38
+ `Distributed Resilience Training and
39
+ Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
36
40
 
37
41
  Note:
38
42
  Strategy file of each pipeline stage should be included in src_strategy_dirs.
@@ -55,32 +59,24 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
55
59
  _make_dir(dst_strategy_dir, "path")
56
60
  if not os.path.isdir(src_strategy_dirs):
57
61
  raise NotADirectoryError("src_strategy_dirs {} is not a directory.".format(src_strategy_dirs))
58
- src_strategy_files = os.path.join(src_strategy_dirs, "*.ckpt")
59
- dst_parallel_strategy_map = ms.train.node_strategy_pb2.ParallelStrategyMap()
60
- merged_stage = []
61
- for src_strategy_file in glob.glob(src_strategy_files):
62
- src_parallel_strategy_map = _load_strategy_file(src_strategy_file)
63
- strategy_items = src_parallel_strategy_map.parallel_strategy_item
64
- layout_items = src_parallel_strategy_map.parallel_layout_item
65
- if not strategy_items or not layout_items:
66
- raise ValueError("The strategy file {} is empty".format(src_strategy_file))
67
- pipeline_stage = strategy_items[0].parallel_strategys.stage
68
- if pipeline_stage in merged_stage:
69
- continue
70
- for layout_item in layout_items:
71
- layout_item.param_name = "-".join([str(pipeline_stage), layout_item.param_name])
72
- dst_parallel_strategy_map.parallel_strategy_item.extend(strategy_items)
73
- dst_parallel_strategy_map.parallel_layout_item.extend(layout_items)
74
- merged_stage.append(pipeline_stage)
75
- dst_parallel_strategy_map.current_stage = 1
76
- with open(dst_strategy_file, "wb") as f:
77
- f.write(dst_parallel_strategy_map.SerializeToString())
62
+ src_strategy_files_protobuf = glob.glob(os.path.join(src_strategy_dirs, "*.ckpt"))
63
+ src_strategy_files_json = glob.glob(os.path.join(src_strategy_dirs, "*.json"))
64
+ if src_strategy_files_protobuf and src_strategy_files_json:
65
+ raise ValueError("The strategys format should be all '.ckpt' or all '.json'")
66
+ is_protobuf = len(src_strategy_files_protobuf) > 0
67
+ if is_protobuf:
68
+ _merge_protobuf_strategy(src_strategy_files_protobuf, dst_strategy_file)
69
+ else:
70
+ _merge_json_strategy(src_strategy_files_json, dst_strategy_file)
71
+
78
72
 
79
73
 
80
74
  def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=None):
81
75
  """
82
76
  List of original distributed checkpoint rank index for obtaining the target checkpoint of a rank_id
83
- during the distributed checkpoint conversion.
77
+ during the distributed checkpoint conversion. For more details about converting distributed Checkpoint,
78
+ please refer to `Distributed Resilience Training and
79
+ Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
84
80
 
85
81
  Args:
86
82
  rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
@@ -106,7 +102,7 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
106
102
  >>> rank_list = rank_list_for_transform(rank_id, "./src_strategy.ckpt", "./dst_strategy.ckpt")
107
103
  >>> checkpoint_files_map = {}
108
104
  >>> for rank in rank_list:
109
- >>> checkpoint_files_map[rank] = "./pangu{}-100_2.ckpt".format(rank)
105
+ ... checkpoint_files_map[rank] = "./pangu{}-100_2.ckpt".format(rank)
110
106
 
111
107
  """
112
108
  if not isinstance(rank_id, int):
@@ -133,14 +129,18 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
133
129
  src_rank_id_start = src_pipeline_stage_id * src_stage_device_num
134
130
  result_set.update([src_rank_id_start + rank for rank in needed_rank_list_in_local_stage])
135
131
  handled_pipeline_stage.append(src_pipeline_stage_id)
136
- return list(result_set)
132
+ result_list = list(result_set)
133
+ result_list.sort(reverse=True)
134
+ return result_list
137
135
 
138
136
 
139
137
  def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_file_name,
140
138
  src_strategy_file=None, dst_strategy_file=None):
141
139
  """
142
140
  Transform distributed checkpoint from source sharding strategy to destination sharding strategy by rank
143
- for a network.
141
+ for a network. For more details about converting distributed Checkpoint, please refer to
142
+ `Distributed Resilience Training and
143
+ Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
144
144
 
145
145
  Args:
146
146
  rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
@@ -167,13 +167,13 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
167
167
 
168
168
  Examples:
169
169
  >>> dst_device_num = 8
170
- >>> for rank_id in range(dst_device_num)
171
- >>> rank_list = rank_list_for_transform(rank_id, "./src_strategy.ckpt", "./dst_strategy.ckpt")
172
- >>> checkpoint_files_map = {}
173
- >>> for rank in rank_list:
174
- >>> checkpoint_files_map[rank] = "./origin_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank)
175
- >>> save_checkpoint_file_name = "./new_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank_id)
176
- >>> transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_file_name,
170
+ >>> for rank_id in range(dst_device_num):
171
+ ... rank_list = rank_list_for_transform(rank_id, "./src_strategy.ckpt", "./dst_strategy.ckpt")
172
+ ... checkpoint_files_map = {}
173
+ ... for rank in rank_list:
174
+ ... checkpoint_files_map[rank] = "./origin_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank)
175
+ ... save_checkpoint_file_name = "./new_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank_id)
176
+ ... transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_file_name,
177
177
  ... "./src_strategy.ckpt", "./dst_strategy.ckpt")
178
178
 
179
179
  """
@@ -222,9 +222,12 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
222
222
  dst_strategy_file=None):
223
223
  """
224
224
  Transform distributed checkpoint from source sharding strategy to destination sharding strategy for a rank.
225
+ For more details about converting distributed Checkpoint, please refer to
226
+ `Distributed Resilience Training and
227
+ Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
225
228
 
226
229
  Note:
227
- The src_checkpoints_dir directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
230
+ The `src_checkpoints_dir` directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
228
231
  rank number should be set to a subdirectory and the checkpoint file is stored in this subdirectory. If multiple
229
232
  files exist in a rank directory, the last file in the lexicgraphic order would be selected.
230
233
 
@@ -154,7 +154,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
154
154
  in_strategy and out_strategy define the input and output layout respectively.
155
155
  in_strategy/out_strategy should be a tuple, each element of which corresponds to the desired layout of
156
156
  this input/output, and None represents data_parallel,
157
- which can refer to the description of `mindspore.ops.Primitive.shard`.
157
+ which can refer to the description of :func:`mindspore.ops.Primitive.shard`.
158
158
  The parallel strategies of remaining operators are derived from the strategy specified by the input and output.
159
159
 
160
160
  Note:
@@ -162,16 +162,18 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
162
162
  set the parallel mode in `set_auto_parallel_context` to "auto_parallel"
163
163
  and the search mode to "sharding_propagation".
164
164
  If the input contain Parameter, its strategy should be set in `in_strategy`.
165
+ For more details about shard, please refer to `Functional Operator Sharding
166
+ <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/pynative_shard_function_parallel.html>`_.
165
167
 
166
168
  Args:
167
169
  fn (Union[Cell, Function]): Function to be executed in parallel.
168
170
  Its arguments and return value must be Tensor or Parameter.
169
- If fn is a Cell with parameters, fn needs to be an instantiated object,
171
+ If `fn` is a Cell with parameters, `fn` needs to be an instantiated object,
170
172
  otherwise its arguments cannot be accessed.
171
173
  in_strategy (tuple): Define the layout of inputs, each element of the tuple should be a tuple or None.
172
174
  Tuple defines the layout of the corresponding input
173
175
  and None represents a data parallel strategy.
174
- out_strategy (Union[tuple, None]): Define the layout of outputs similar with in_strategy.
176
+ out_strategy (Union[tuple, None]): Define the layout of outputs similar with `in_strategy`.
175
177
  It is not in use right now. Default: None.
176
178
  parameter_plan (Union[dict, None]): Define the layout for the specified parameters. Each element in dict
177
179
  defines the layout of the parameter like "param_name: layout".
@@ -180,7 +182,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
180
182
  If the parameter name is incorrect or the corresponding parameter
181
183
  has been set, the parameter setting will be ignored.
182
184
  Default: None.
183
- device (string): Select a certain device target. It is not in use right now.
185
+ device (string): Select a certain `device` target. It is not in use right now.
184
186
  Support ["CPU", "GPU", "Ascend"]. Default: "Ascend".
185
187
  level (int): Option for parallel strategy infer algorithm, namely the object function, maximize computation
186
188
  over communication ratio, maximize speed performance, minimize memory usage etc. It is not in
@@ -190,20 +192,17 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
190
192
  Function, return the function that will be executed under auto parallel process.
191
193
 
192
194
  Raises:
193
- AssertionError:
194
- - If execute mode is not PYNATIVE_MODE.
195
- - If parallel mode is not "auto_parallel".
196
- - If search_mode it not "sharding_propagation".
197
- - If device_target it not "Ascend" or "GPU".
198
-
199
- TypeError:
200
- - If `in_strategy` is not a tuple.
201
- - If `out_strategy` is not a tuple or None.
202
- - If `parameter_plan` is not a dict or None.
203
- - If any key in `parameter_plan` is not a str.
204
- - If any value in `parameter_plan` is not a tuple.
205
- - If `device` is not a str.
206
- - If `level` is not a integer.
195
+ AssertionError: If execute mode is not PYNATIVE_MODE.
196
+ AssertionError: If parallel mode is not "auto_parallel".
197
+ AssertionError: If search_mode it not "sharding_propagation".
198
+ AssertionError: If device_target it not "Ascend" or "GPU".
199
+ TypeError: If `in_strategy` is not a tuple.
200
+ TypeError: If `out_strategy` is not a tuple or None.
201
+ TypeError: If `parameter_plan` is not a dict or None.
202
+ TypeError: If any key in `parameter_plan` is not a str.
203
+ TypeError: If any value in `parameter_plan` is not a tuple.
204
+ TypeError: If `device` is not a str.
205
+ TypeError: If `level` is not an integer.
207
206
 
208
207
  Supported Platforms:
209
208
  ``Ascend`` ``GPU``
@@ -32,11 +32,11 @@ def check_valid_character_of_path(file_path):
32
32
  Returns:
33
33
  bool, whether valid.
34
34
  """
35
- re_path = r'^[/\\_a-zA-Z0-9-_.]+$'
35
+ re_path = r'^[/\\_a-zA-Z0-9-_.@]+$'
36
36
  path_valid = re.fullmatch(re_path, file_path)
37
37
  if not path_valid:
38
38
  msg = "The output path of profiler only supports alphabets(a-zA-Z), " \
39
- "digit(0-9) or {'-', '_', '.', '/'}, but got the absolute path= " + file_path
39
+ "digit(0-9) or {'-', '_', '.', '/', '@'}, but got the absolute path= " + file_path
40
40
  raise RuntimeError(msg)
41
41
 
42
42