mindspore 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (655) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -2
  3. mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +102 -0
  7. mindspore/_checkparam.py +1066 -1001
  8. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
  9. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
  10. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
  11. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
  12. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  13. mindspore/_extends/parse/__init__.py +5 -3
  14. mindspore/_extends/parse/namespace.py +16 -1
  15. mindspore/_extends/parse/parser.py +107 -22
  16. mindspore/_extends/parse/resources.py +0 -7
  17. mindspore/_extends/parse/standard_method.py +885 -413
  18. mindspore/amp.py +52 -57
  19. mindspore/boost/boost.py +2 -2
  20. mindspore/boost/boost_cell_wrapper.py +38 -20
  21. mindspore/boost/dim_reduce.py +3 -3
  22. mindspore/boost/group_loss_scale_manager.py +1 -1
  23. mindspore/common/__init__.py +4 -6
  24. mindspore/common/_decorator.py +2 -0
  25. mindspore/common/_register_for_adapter.py +55 -0
  26. mindspore/common/_stub_tensor.py +201 -0
  27. mindspore/common/_utils.py +41 -7
  28. mindspore/common/api.py +215 -141
  29. mindspore/common/dtype.py +8 -1
  30. mindspore/common/dump.py +2 -2
  31. mindspore/common/initializer.py +4 -2
  32. mindspore/common/jit_config.py +17 -13
  33. mindspore/common/mutable.py +33 -13
  34. mindspore/common/parameter.py +23 -21
  35. mindspore/common/seed.py +8 -24
  36. mindspore/common/sparse_tensor.py +62 -41
  37. mindspore/common/tensor.py +852 -1154
  38. mindspore/communication/__init__.py +2 -2
  39. mindspore/communication/_comm_helper.py +11 -4
  40. mindspore/communication/management.py +22 -21
  41. mindspore/config/op_info.config +501 -1008
  42. mindspore/context.py +201 -23
  43. mindspore/dataset/__init__.py +6 -6
  44. mindspore/dataset/audio/__init__.py +7 -7
  45. mindspore/dataset/audio/transforms.py +670 -30
  46. mindspore/dataset/audio/utils.py +47 -4
  47. mindspore/dataset/audio/validators.py +223 -1
  48. mindspore/dataset/callback/ds_callback.py +2 -2
  49. mindspore/dataset/core/config.py +210 -14
  50. mindspore/dataset/core/validator_helpers.py +2 -2
  51. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  52. mindspore/dataset/debug/debug_hook.py +65 -0
  53. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  54. mindspore/dataset/engine/__init__.py +7 -3
  55. mindspore/dataset/engine/cache_client.py +1 -1
  56. mindspore/dataset/engine/datasets.py +322 -66
  57. mindspore/dataset/engine/datasets_audio.py +80 -76
  58. mindspore/dataset/engine/datasets_standard_format.py +51 -38
  59. mindspore/dataset/engine/datasets_text.py +232 -118
  60. mindspore/dataset/engine/datasets_user_defined.py +41 -17
  61. mindspore/dataset/engine/datasets_vision.py +746 -225
  62. mindspore/dataset/engine/graphdata.py +75 -10
  63. mindspore/dataset/engine/iterators.py +45 -5
  64. mindspore/dataset/engine/offload.py +48 -28
  65. mindspore/dataset/engine/validators.py +117 -8
  66. mindspore/dataset/text/__init__.py +6 -5
  67. mindspore/dataset/text/transforms.py +86 -3
  68. mindspore/dataset/text/utils.py +6 -4
  69. mindspore/dataset/text/validators.py +25 -0
  70. mindspore/dataset/transforms/__init__.py +3 -2
  71. mindspore/dataset/transforms/c_transforms.py +1 -1
  72. mindspore/dataset/transforms/transforms.py +2 -2
  73. mindspore/dataset/utils/__init__.py +2 -1
  74. mindspore/dataset/utils/line_reader.py +121 -0
  75. mindspore/dataset/vision/__init__.py +2 -3
  76. mindspore/dataset/vision/c_transforms.py +9 -9
  77. mindspore/dataset/vision/py_transforms.py +5 -5
  78. mindspore/dataset/vision/py_transforms_util.py +2 -0
  79. mindspore/dataset/vision/transforms.py +160 -161
  80. mindspore/dataset/vision/utils.py +3 -3
  81. mindspore/experimental/map_parameter.py +38 -26
  82. mindspore/include/OWNERS +0 -1
  83. mindspore/include/api/callback/callback.h +9 -13
  84. mindspore/include/api/callback/ckpt_saver.h +2 -2
  85. mindspore/include/api/callback/loss_monitor.h +2 -2
  86. mindspore/include/api/callback/lr_scheduler.h +5 -5
  87. mindspore/include/api/callback/time_monitor.h +2 -2
  88. mindspore/include/api/callback/train_accuracy.h +4 -6
  89. mindspore/include/api/cfg.h +19 -6
  90. mindspore/include/api/context.h +44 -9
  91. mindspore/include/api/delegate.h +1 -1
  92. mindspore/include/api/metrics/accuracy.h +2 -2
  93. mindspore/include/api/metrics/metrics.h +4 -3
  94. mindspore/include/api/model.h +9 -4
  95. mindspore/include/api/model_parallel_runner.h +2 -2
  96. mindspore/include/api/net.h +12 -11
  97. mindspore/include/api/serialization.h +19 -3
  98. mindspore/include/api/types.h +3 -3
  99. mindspore/include/dataset/constants.h +7 -0
  100. mindspore/include/dataset/text.h +59 -0
  101. mindspore/jpeg62.dll +0 -0
  102. mindspore/log.py +1 -1
  103. mindspore/mindrecord/filereader.py +18 -0
  104. mindspore/mindrecord/filewriter.py +197 -34
  105. mindspore/mindrecord/shardreader.py +9 -0
  106. mindspore/mindrecord/shardwriter.py +1 -1
  107. mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
  108. mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
  109. mindspore/mindrecord/tools/csv_to_mr.py +3 -3
  110. mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
  111. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  112. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  113. mindspore/mindspore_backend.dll +0 -0
  114. mindspore/mindspore_common.dll +0 -0
  115. mindspore/mindspore_core.dll +0 -0
  116. mindspore/mindspore_glog.dll +0 -0
  117. mindspore/mindspore_shared_lib.dll +0 -0
  118. mindspore/nn/__init__.py +0 -4
  119. mindspore/nn/cell.py +204 -132
  120. mindspore/nn/dynamic_lr.py +1 -1
  121. mindspore/nn/grad/cell_grad.py +7 -6
  122. mindspore/nn/layer/__init__.py +5 -4
  123. mindspore/nn/layer/activation.py +40 -89
  124. mindspore/nn/layer/basic.py +255 -624
  125. mindspore/nn/layer/channel_shuffle.py +7 -6
  126. mindspore/nn/layer/combined.py +1 -1
  127. mindspore/nn/layer/container.py +41 -4
  128. mindspore/nn/layer/conv.py +64 -28
  129. mindspore/nn/layer/dense.py +9 -8
  130. mindspore/nn/layer/embedding.py +27 -25
  131. mindspore/nn/layer/image.py +53 -46
  132. mindspore/nn/layer/math.py +97 -105
  133. mindspore/nn/layer/normalization.py +117 -86
  134. mindspore/nn/layer/padding.py +185 -95
  135. mindspore/nn/layer/pooling.py +817 -414
  136. mindspore/nn/layer/rnn_cells.py +10 -15
  137. mindspore/nn/layer/rnns.py +37 -38
  138. mindspore/nn/layer/thor_layer.py +11 -12
  139. mindspore/nn/layer/timedistributed.py +5 -5
  140. mindspore/nn/layer/transformer.py +701 -0
  141. mindspore/nn/learning_rate_schedule.py +8 -8
  142. mindspore/nn/loss/__init__.py +5 -4
  143. mindspore/nn/loss/loss.py +334 -199
  144. mindspore/nn/optim/ada_grad.py +6 -6
  145. mindspore/nn/optim/adadelta.py +2 -3
  146. mindspore/nn/optim/adafactor.py +4 -5
  147. mindspore/nn/optim/adam.py +126 -62
  148. mindspore/nn/optim/adamax.py +3 -4
  149. mindspore/nn/optim/adasum.py +6 -6
  150. mindspore/nn/optim/asgd.py +2 -2
  151. mindspore/nn/optim/ftrl.py +67 -38
  152. mindspore/nn/optim/lamb.py +4 -5
  153. mindspore/nn/optim/lars.py +2 -2
  154. mindspore/nn/optim/lazyadam.py +43 -4
  155. mindspore/nn/optim/momentum.py +6 -5
  156. mindspore/nn/optim/optimizer.py +3 -1
  157. mindspore/nn/optim/proximal_ada_grad.py +2 -2
  158. mindspore/nn/optim/rmsprop.py +1 -1
  159. mindspore/nn/optim/rprop.py +8 -9
  160. mindspore/nn/optim/sgd.py +19 -13
  161. mindspore/nn/optim/thor.py +10 -15
  162. mindspore/nn/probability/__init__.py +0 -2
  163. mindspore/nn/probability/bijector/bijector.py +4 -4
  164. mindspore/nn/probability/bijector/invert.py +1 -1
  165. mindspore/nn/probability/bijector/softplus.py +2 -2
  166. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  167. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  168. mindspore/nn/probability/distribution/_utils/utils.py +9 -15
  169. mindspore/nn/probability/distribution/bernoulli.py +3 -3
  170. mindspore/nn/probability/distribution/beta.py +1 -1
  171. mindspore/nn/probability/distribution/categorical.py +5 -7
  172. mindspore/nn/probability/distribution/cauchy.py +3 -3
  173. mindspore/nn/probability/distribution/distribution.py +2 -2
  174. mindspore/nn/probability/distribution/exponential.py +2 -2
  175. mindspore/nn/probability/distribution/gamma.py +3 -3
  176. mindspore/nn/probability/distribution/geometric.py +1 -1
  177. mindspore/nn/probability/distribution/gumbel.py +3 -3
  178. mindspore/nn/probability/distribution/half_normal.py +15 -11
  179. mindspore/nn/probability/distribution/laplace.py +16 -13
  180. mindspore/nn/probability/distribution/logistic.py +2 -2
  181. mindspore/nn/probability/distribution/normal.py +1 -1
  182. mindspore/nn/probability/distribution/poisson.py +1 -1
  183. mindspore/nn/probability/distribution/student_t.py +20 -15
  184. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  185. mindspore/nn/probability/distribution/uniform.py +2 -2
  186. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  187. mindspore/nn/reinforcement/tensor_array.py +2 -2
  188. mindspore/nn/sparse/sparse.py +2 -2
  189. mindspore/nn/wrap/cell_wrapper.py +27 -10
  190. mindspore/nn/wrap/grad_reducer.py +2 -2
  191. mindspore/nn/wrap/loss_scale.py +40 -24
  192. mindspore/numpy/array_creations.py +33 -22
  193. mindspore/numpy/array_ops.py +35 -30
  194. mindspore/numpy/logic_ops.py +6 -27
  195. mindspore/numpy/math_ops.py +22 -19
  196. mindspore/numpy/utils.py +1 -1
  197. mindspore/numpy/utils_const.py +108 -58
  198. mindspore/opencv_core452.dll +0 -0
  199. mindspore/opencv_imgcodecs452.dll +0 -0
  200. mindspore/opencv_imgproc452.dll +0 -0
  201. mindspore/ops/_constants.py +0 -6
  202. mindspore/ops/_grad/__init__.py +2 -1
  203. mindspore/ops/_grad/grad_array_ops.py +86 -117
  204. mindspore/ops/_grad/grad_base.py +23 -1
  205. mindspore/ops/_grad/grad_clip_ops.py +2 -3
  206. mindspore/ops/_grad/grad_comm_ops.py +34 -24
  207. mindspore/ops/_grad/grad_implementations.py +9 -45
  208. mindspore/ops/_grad/grad_inner_ops.py +47 -4
  209. mindspore/ops/_grad/grad_math_ops.py +142 -117
  210. mindspore/ops/_grad/grad_nn_ops.py +71 -165
  211. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  212. mindspore/ops/_grad/grad_sparse.py +7 -6
  213. mindspore/ops/_grad_experimental/__init__.py +1 -0
  214. mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
  215. mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
  216. mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
  217. mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
  218. mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
  219. mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
  220. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  221. mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
  222. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  223. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
  224. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  225. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  226. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
  227. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  228. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  229. mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
  230. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  231. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
  232. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  233. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  234. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  235. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  236. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  237. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  238. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  239. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  240. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  241. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  242. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  243. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  244. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  245. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  246. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  247. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  248. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  249. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  250. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
  251. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  252. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  253. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  254. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  255. mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
  256. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  257. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
  258. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  259. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  260. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  261. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  262. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  263. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
  264. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  265. mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
  266. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  267. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  268. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
  269. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  270. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  271. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
  272. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  273. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  274. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  275. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  276. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  277. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  278. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  279. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  280. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  281. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
  282. mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
  283. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  284. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  285. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  286. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  287. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  288. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  289. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  290. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  291. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  292. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  293. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  294. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  295. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  296. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  297. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  298. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  299. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  300. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  301. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  302. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  303. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  304. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  305. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  306. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  307. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  308. mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
  309. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
  310. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  311. mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
  312. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  313. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  314. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  315. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  316. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  317. mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
  318. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
  319. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  320. mindspore/ops/_op_impl/tbe/__init__.py +27 -611
  321. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  322. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  323. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  324. mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
  325. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  326. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  327. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  328. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  329. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  330. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  331. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  332. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  333. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  334. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
  335. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  336. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  337. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  338. mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
  339. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  340. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  341. mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
  342. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
  343. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  344. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  345. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  346. mindspore/ops/_register_for_op.py +1 -0
  347. mindspore/ops/_utils/__init__.py +1 -2
  348. mindspore/ops/_utils/utils.py +19 -40
  349. mindspore/ops/_vmap/vmap_array_ops.py +116 -38
  350. mindspore/ops/_vmap/vmap_base.py +16 -9
  351. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  352. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  353. mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
  354. mindspore/ops/_vmap/vmap_image_ops.py +12 -5
  355. mindspore/ops/_vmap/vmap_math_ops.py +46 -5
  356. mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
  357. mindspore/ops/_vmap/vmap_random_ops.py +1 -1
  358. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  359. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  360. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  361. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  362. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  363. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  364. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  365. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  366. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
  367. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  368. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  369. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  370. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  371. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  372. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  373. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  374. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  375. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  376. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  377. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
  378. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
  379. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  380. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  381. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  382. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  383. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  384. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  385. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
  386. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  387. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
  388. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
  389. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  390. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  391. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  392. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  393. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  394. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  395. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  396. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  397. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  398. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  399. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  400. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  401. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  402. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  403. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  404. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  405. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  406. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  407. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  408. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
  409. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  410. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  411. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  412. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  413. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  414. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  415. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  416. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
  417. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
  418. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  419. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
  420. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  421. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  422. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  423. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  424. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  425. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  426. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  427. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  428. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  429. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
  430. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
  431. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  432. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  433. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  434. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  435. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
  436. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  437. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  438. mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
  439. mindspore/ops/composite/__init__.py +7 -8
  440. mindspore/ops/composite/base.py +101 -47
  441. mindspore/ops/composite/math_ops.py +188 -158
  442. mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
  443. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
  444. mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
  445. mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
  446. mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
  447. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  448. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  449. mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
  450. mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
  451. mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
  452. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
  453. mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
  454. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  455. mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
  456. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
  457. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
  458. mindspore/ops/function/__init__.py +152 -8
  459. mindspore/ops/function/array_func.py +2555 -674
  460. mindspore/ops/function/clip_func.py +209 -13
  461. mindspore/ops/function/debug_func.py +2 -2
  462. mindspore/ops/function/grad/__init__.py +2 -1
  463. mindspore/ops/function/grad/grad_func.py +147 -62
  464. mindspore/ops/function/image_func.py +54 -38
  465. mindspore/ops/function/linalg_func.py +167 -16
  466. mindspore/ops/function/math_func.py +4849 -1492
  467. mindspore/ops/function/nn_func.py +2573 -988
  468. mindspore/ops/function/other_func.py +115 -0
  469. mindspore/ops/function/parameter_func.py +3 -3
  470. mindspore/ops/function/random_func.py +790 -73
  471. mindspore/ops/function/sparse_func.py +98 -78
  472. mindspore/ops/function/sparse_unary_func.py +54 -53
  473. mindspore/ops/function/spectral_func.py +27 -24
  474. mindspore/ops/function/vmap_func.py +22 -2
  475. mindspore/ops/functional.py +97 -37
  476. mindspore/ops/op_info_register.py +70 -28
  477. mindspore/ops/operations/__init__.py +47 -14
  478. mindspore/ops/operations/_csr_ops.py +7 -7
  479. mindspore/ops/operations/_embedding_cache_ops.py +5 -5
  480. mindspore/ops/operations/_grad_ops.py +276 -187
  481. mindspore/ops/operations/_inner_ops.py +319 -113
  482. mindspore/ops/operations/_ms_kernel.py +10 -8
  483. mindspore/ops/operations/_ocr_ops.py +9 -9
  484. mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
  485. mindspore/ops/operations/_quant_ops.py +137 -102
  486. mindspore/ops/operations/_rl_inner_ops.py +121 -60
  487. mindspore/ops/operations/_scalar_ops.py +466 -0
  488. mindspore/ops/operations/_sequence_ops.py +1004 -2
  489. mindspore/ops/operations/_tensor_array.py +10 -11
  490. mindspore/ops/operations/_thor_ops.py +1 -1
  491. mindspore/ops/operations/array_ops.py +801 -466
  492. mindspore/ops/operations/comm_ops.py +51 -49
  493. mindspore/ops/operations/control_ops.py +2 -2
  494. mindspore/ops/operations/custom_ops.py +123 -44
  495. mindspore/ops/operations/debug_ops.py +24 -24
  496. mindspore/ops/operations/image_ops.py +240 -153
  497. mindspore/ops/operations/inner_ops.py +34 -50
  498. mindspore/ops/operations/linalg_ops.py +31 -9
  499. mindspore/ops/operations/math_ops.py +988 -757
  500. mindspore/ops/operations/nn_ops.py +965 -819
  501. mindspore/ops/operations/other_ops.py +51 -40
  502. mindspore/ops/operations/random_ops.py +204 -122
  503. mindspore/ops/operations/rl_ops.py +8 -9
  504. mindspore/ops/operations/sparse_ops.py +254 -93
  505. mindspore/ops/operations/spectral_ops.py +35 -3
  506. mindspore/ops/primitive.py +111 -9
  507. mindspore/parallel/_auto_parallel_context.py +189 -83
  508. mindspore/parallel/_offload_context.py +185 -0
  509. mindspore/parallel/_parallel_serialization.py +99 -7
  510. mindspore/parallel/_ps_context.py +9 -5
  511. mindspore/parallel/_recovery_context.py +1 -1
  512. mindspore/parallel/_tensor.py +7 -1
  513. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  514. mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
  515. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  516. mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
  517. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  518. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
  519. mindspore/parallel/_utils.py +1 -2
  520. mindspore/parallel/algo_parameter_config.py +1 -1
  521. mindspore/parallel/checkpoint_transform.py +37 -34
  522. mindspore/parallel/shard.py +17 -18
  523. mindspore/profiler/common/validator/validate_path.py +2 -2
  524. mindspore/profiler/envprofiling.py +69 -47
  525. mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
  526. mindspore/profiler/parser/base_timeline_generator.py +49 -56
  527. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
  528. mindspore/profiler/parser/hwts_log_parser.py +1 -1
  529. mindspore/profiler/parser/integrator.py +15 -14
  530. mindspore/profiler/parser/minddata_analyzer.py +2 -2
  531. mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
  532. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  533. mindspore/profiler/parser/optime_parser.py +17 -18
  534. mindspore/profiler/parser/profiler_info.py +2 -1
  535. mindspore/profiler/profiling.py +218 -186
  536. mindspore/rewrite/__init__.py +3 -1
  537. mindspore/rewrite/api/node.py +1 -114
  538. mindspore/rewrite/api/node_type.py +3 -0
  539. mindspore/rewrite/api/pattern_engine.py +31 -1
  540. mindspore/rewrite/api/scoped_value.py +4 -4
  541. mindspore/rewrite/api/symbol_tree.py +3 -78
  542. mindspore/rewrite/api/tree_node_helper.py +1 -1
  543. mindspore/rewrite/ast_creator_register.py +1 -0
  544. mindspore/rewrite/ast_helpers/__init__.py +2 -2
  545. mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
  546. mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
  547. mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
  548. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
  549. mindspore/rewrite/namespace.py +0 -2
  550. mindspore/rewrite/node.py +157 -11
  551. mindspore/rewrite/parsers/assign_parser.py +231 -53
  552. mindspore/rewrite/parsers/class_def_parser.py +187 -109
  553. mindspore/rewrite/parsers/for_parser.py +24 -14
  554. mindspore/rewrite/parsers/function_def_parser.py +21 -4
  555. mindspore/rewrite/parsers/if_parser.py +6 -2
  556. mindspore/rewrite/sparsify/__init__.py +0 -0
  557. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  558. mindspore/rewrite/sparsify/sparsify.py +109 -0
  559. mindspore/rewrite/sparsify/utils.py +173 -0
  560. mindspore/rewrite/symbol_tree.py +256 -133
  561. mindspore/rewrite/symbol_tree_builder.py +38 -1
  562. mindspore/run_check/_check_version.py +69 -63
  563. mindspore/run_check/run_check.py +2 -1
  564. mindspore/tinyxml2.dll +0 -0
  565. mindspore/train/__init__.py +1 -1
  566. mindspore/train/_utils.py +28 -5
  567. mindspore/train/amp.py +273 -102
  568. mindspore/train/callback/_backup_and_restore.py +5 -5
  569. mindspore/train/callback/_callback.py +2 -2
  570. mindspore/train/callback/_checkpoint.py +3 -3
  571. mindspore/train/callback/_early_stop.py +3 -3
  572. mindspore/train/callback/_lambda_callback.py +2 -2
  573. mindspore/train/callback/_landscape.py +29 -31
  574. mindspore/train/callback/_loss_monitor.py +3 -3
  575. mindspore/train/callback/_on_request_exit.py +3 -3
  576. mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
  577. mindspore/train/callback/_summary_collector.py +23 -16
  578. mindspore/train/callback/_time_monitor.py +3 -3
  579. mindspore/train/checkpoint_pb2.py +68 -8
  580. mindspore/train/data_sink.py +15 -3
  581. mindspore/train/dataset_helper.py +10 -15
  582. mindspore/train/loss_scale_manager.py +8 -11
  583. mindspore/train/metrics/__init__.py +1 -1
  584. mindspore/train/metrics/bleu_score.py +1 -1
  585. mindspore/train/metrics/confusion_matrix.py +1 -1
  586. mindspore/train/metrics/cosine_similarity.py +1 -1
  587. mindspore/train/metrics/dice.py +2 -2
  588. mindspore/train/metrics/fbeta.py +1 -1
  589. mindspore/train/metrics/hausdorff_distance.py +4 -3
  590. mindspore/train/metrics/mean_surface_distance.py +2 -2
  591. mindspore/train/metrics/occlusion_sensitivity.py +1 -1
  592. mindspore/train/metrics/perplexity.py +1 -1
  593. mindspore/train/metrics/precision.py +1 -1
  594. mindspore/train/metrics/recall.py +1 -1
  595. mindspore/train/metrics/roc.py +2 -2
  596. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  597. mindspore/train/mind_ir_pb2.py +116 -37
  598. mindspore/train/model.py +45 -28
  599. mindspore/train/serialization.py +295 -188
  600. mindspore/train/summary/_summary_adapter.py +1 -1
  601. mindspore/train/summary/summary_record.py +43 -13
  602. mindspore/train/train_thor/convert_utils.py +2 -2
  603. mindspore/train/train_thor/dataset_helper.py +3 -3
  604. mindspore/turbojpeg.dll +0 -0
  605. mindspore/version.py +1 -1
  606. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
  607. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
  608. mindspore/compression/__init__.py +0 -19
  609. mindspore/compression/common/constant.py +0 -124
  610. mindspore/compression/export/__init__.py +0 -19
  611. mindspore/compression/export/quant_export.py +0 -515
  612. mindspore/compression/quant/__init__.py +0 -28
  613. mindspore/compression/quant/qat.py +0 -634
  614. mindspore/compression/quant/quant_utils.py +0 -462
  615. mindspore/compression/quant/quantizer.py +0 -68
  616. mindspore/nn/layer/quant.py +0 -1868
  617. mindspore/nn/layer/rnn_utils.py +0 -90
  618. mindspore/nn/probability/dpn/__init__.py +0 -22
  619. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  620. mindspore/nn/probability/dpn/vae/cvae.py +0 -140
  621. mindspore/nn/probability/dpn/vae/vae.py +0 -124
  622. mindspore/nn/probability/infer/__init__.py +0 -22
  623. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  624. mindspore/nn/probability/infer/variational/svi.py +0 -84
  625. mindspore/nn/probability/toolbox/__init__.py +0 -22
  626. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  627. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
  628. mindspore/nn/probability/transforms/__init__.py +0 -22
  629. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  630. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  631. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  632. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  633. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  634. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  635. mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
  636. mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
  637. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
  638. mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
  639. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
  640. mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
  642. mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
  643. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
  644. mindspore/ops/composite/array_ops.py +0 -241
  645. mindspore/ops/composite/clip_ops.py +0 -134
  646. mindspore/ops/composite/random_ops.py +0 -426
  647. mindspore/ops/composite/vmap_ops.py +0 -38
  648. mindspore/parallel/nn/__init__.py +0 -42
  649. mindspore/parallel/nn/loss.py +0 -22
  650. mindspore/parallel/nn/moe.py +0 -21
  651. mindspore/parallel/nn/op_parallel_config.py +0 -22
  652. mindspore/parallel/nn/transformer.py +0 -31
  653. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  654. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  655. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,185 @@
1
+ # Copyright 2023 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """Context of offload"""
16
+ from __future__ import absolute_import
17
+ from __future__ import division
18
+
19
+ import threading
20
+
21
+ from mindspore._c_expression import OffloadContext
22
+ from mindspore._checkparam import args_type_check
23
+ from mindspore import _checkparam as Validator
24
+
25
+
26
+ class _OffloadConfig:
27
+ """
28
+ The key of the Offload Config.
29
+ """
30
+ ENABLE_OFFLOAD = "enable_offload"
31
+ OFFLOAD_PARAM = "offload_param"
32
+ OFFLOAD_PATH = "offload_path"
33
+ OFFLOAD_CHECKPOINT = "offload_checkpoint"
34
+ OFFLOAD_DDR_SIZE = "offload_ddr_size"
35
+ OFFLOAD_DISK_SIZE = "offload_disk_size"
36
+ ENABLE_AIO = "enable_aio"
37
+ AIO_BLOCK_SIZE = "aio_block_size"
38
+ AIO_QUEUE_DEPTH = "aio_queue_depth"
39
+ ENABLE_PINNED_MEM = "enable_pinned_mem"
40
+
41
+
42
+ class _OffloadContext:
43
+ """
44
+ _OffloadContext is the configuration for offload.
45
+
46
+ Note:
47
+ Create a context through instantiating Context object is not recommended.
48
+ Should use offload_context() to get the context since Context is singleton.
49
+ """
50
+ _instance = None
51
+ _instance_lock = threading.Lock()
52
+
53
+ def __new__(cls):
54
+ if cls._instance is None:
55
+ cls._instance_lock.acquire()
56
+ cls._instance = object.__new__(cls)
57
+ cls._instance_lock.release()
58
+ return cls._instance
59
+
60
+ def __init__(self):
61
+ self._context_handle = OffloadContext.get_instance()
62
+
63
+ def check_context_handle(self):
64
+ """
65
+ Check context handle.
66
+
67
+ Raises:
68
+ ValueError: If the context handle is none.
69
+ """
70
+ if self._context_handle is None:
71
+ raise ValueError("Context handle is none in context!!!")
72
+
73
+ def set_offload_config(self, offload_config):
74
+ """Set offfload context"""
75
+ self.check_context_handle()
76
+ enable_offload = _OffloadConfig.ENABLE_OFFLOAD
77
+ offload_param = _OffloadConfig.OFFLOAD_PARAM
78
+ offload_path = _OffloadConfig.OFFLOAD_PATH
79
+ offload_checkpoint = _OffloadConfig.OFFLOAD_CHECKPOINT
80
+ offload_ddr_size = _OffloadConfig.OFFLOAD_DDR_SIZE
81
+ offload_disk_size = _OffloadConfig.OFFLOAD_DISK_SIZE
82
+ enable_aio = _OffloadConfig.ENABLE_AIO
83
+ aio_block_size = _OffloadConfig.AIO_BLOCK_SIZE
84
+ aio_queue_depth = _OffloadConfig.AIO_QUEUE_DEPTH
85
+ enable_pinned_mem = _OffloadConfig.ENABLE_PINNED_MEM
86
+
87
+ for config_name in offload_config:
88
+ unknown_config = []
89
+ if config_name not in [enable_offload, offload_param, offload_path, offload_checkpoint,
90
+ offload_ddr_size, offload_disk_size, enable_aio, aio_block_size,
91
+ aio_queue_depth, enable_pinned_mem]:
92
+ unknown_config.append(config_name)
93
+
94
+ if unknown_config:
95
+ raise ValueError("Unknown config: {}".format(unknown_config))
96
+
97
+ if enable_offload in offload_config:
98
+ Validator.check_bool(
99
+ offload_config[enable_offload], enable_offload, enable_offload)
100
+ self._context_handle.set_enable_offload(
101
+ offload_config[enable_offload])
102
+
103
+ if offload_param in offload_config:
104
+ Validator.check_string(
105
+ offload_config[offload_param].lower(), ["cpu", "disk"])
106
+ self._context_handle.set_offload_param(
107
+ offload_config[offload_param].lower())
108
+
109
+ if offload_path in offload_config:
110
+ if not isinstance(offload_config[offload_path], str):
111
+ raise TypeError("For 'set_offload_path', "
112
+ "the argument 'offload_path' must be str, but got the type : {}."
113
+ .format(type(offload_config[offload_path])))
114
+ self._context_handle.set_offload_path(
115
+ offload_config[offload_path])
116
+ if offload_checkpoint in offload_config:
117
+ Validator.check_string(
118
+ offload_config[offload_checkpoint].lower(), ["cpu", "disk"])
119
+ self._context_handle.set_offload_checkpoint(
120
+ offload_config[offload_checkpoint].lower())
121
+
122
+ if offload_ddr_size in offload_config:
123
+ Validator.check_positive_int(offload_config[offload_ddr_size])
124
+ self._context_handle.set_offload_ddr_size(
125
+ offload_config[offload_ddr_size])
126
+
127
+ if offload_disk_size in offload_config:
128
+ Validator.check_positive_int(offload_config[offload_disk_size])
129
+ self._context_handle.set_offload_disk_size(
130
+ offload_config[offload_disk_size])
131
+ if enable_aio in offload_config:
132
+ Validator.check_bool(
133
+ offload_config[enable_aio], enable_aio, enable_aio)
134
+ self._context_handle.set_enable_aio(
135
+ offload_config[enable_aio])
136
+ if aio_block_size in offload_config:
137
+ Validator.check_positive_int(offload_config[aio_block_size])
138
+ self._context_handle.set_aio_block_size(
139
+ offload_config[aio_block_size])
140
+ if aio_queue_depth in offload_config:
141
+ Validator.check_positive_int(offload_config[aio_queue_depth])
142
+ self._context_handle.set_aio_queue_depth(
143
+ offload_config[aio_queue_depth])
144
+ if enable_pinned_mem in offload_config:
145
+ Validator.check_bool(
146
+ offload_config[enable_pinned_mem], enable_pinned_mem, enable_pinned_mem)
147
+ self._context_handle.set_enable_pinned_mem(
148
+ offload_config[enable_pinned_mem])
149
+
150
+ def offload_config(self):
151
+ """Get config of offload"""
152
+ self.check_context_handle()
153
+ offload_config = {
154
+ _OffloadConfig.ENABLE_OFFLOAD: self._context_handle.enable_offload(),
155
+ _OffloadConfig.OFFLOAD_PARAM: self._context_handle.offload_param(),
156
+ _OffloadConfig.OFFLOAD_PATH: self._context_handle.offload_path(),
157
+ _OffloadConfig.OFFLOAD_CHECKPOINT: self._context_handle.offload_checkpoint(),
158
+ _OffloadConfig.OFFLOAD_DDR_SIZE: self._context_handle.offload_ddr_size(),
159
+ _OffloadConfig.OFFLOAD_DISK_SIZE: self._context_handle.offload_disk_size(),
160
+ _OffloadConfig.ENABLE_AIO: self._context_handle.enable_aio(),
161
+ _OffloadConfig.AIO_BLOCK_SIZE: self._context_handle.aio_block_size(),
162
+ _OffloadConfig.AIO_QUEUE_DEPTH: self._context_handle.aio_queue_depth(),
163
+ _OffloadConfig.ENABLE_PINNED_MEM: self._context_handle.enable_pinned_mem()
164
+ }
165
+ return offload_config
166
+
167
+
168
+ _OFFLOAD_CONTEXT = None
169
+
170
+
171
+ def offload_context():
172
+ """Get offload_context. if it is not created, create a new one."""
173
+ global _OFFLOAD_CONTEXT
174
+ if _OFFLOAD_CONTEXT is None:
175
+ _OFFLOAD_CONTEXT = _OffloadContext()
176
+ return _OFFLOAD_CONTEXT
177
+
178
+
179
+ @args_type_check(offload_config=dict)
180
+ def _set_offload_context(offload_config):
181
+ offload_context().set_offload_config(offload_config)
182
+
183
+
184
+ def _get_offload_context():
185
+ return offload_context().offload_config()
@@ -16,6 +16,7 @@
16
16
  from __future__ import absolute_import
17
17
 
18
18
  import os
19
+ import json
19
20
  import numpy as np
20
21
  import mindspore as ms
21
22
  from mindspore.parallel._tensor import _get_tensor_strategy, _construct_from_to_tensor_layout, \
@@ -81,7 +82,7 @@ def _convert_to_layout(param_name, tensor_layout):
81
82
  return strategy
82
83
 
83
84
 
84
- def _load_strategy_file(strategy_filename):
85
+ def _check_strategy_file(strategy_filename):
85
86
  """load parallel strategy file"""
86
87
  if not isinstance(strategy_filename, str):
87
88
  raise TypeError(f"For 'build_searched_strategy', the argument 'strategy_filename' should be string, "
@@ -94,18 +95,25 @@ def _load_strategy_file(strategy_filename):
94
95
  if os.path.getsize(strategy_filename) == 0:
95
96
  raise ValueError(f"For 'build_searched_strategy', the strategy file {strategy_filename} should not "
96
97
  f"be empty. Please check whether the 'strategy_filename' is correct.")
97
- parallel_strategy_map = ms.train.node_strategy_pb2.ParallelStrategyMap()
98
98
 
99
+
100
+ def _load_protobuf_strategy(strategy_filename):
101
+ """load strategy from protobuf file"""
102
+ parallel_strategy_map = ms.train.node_strategy_pb2.ParallelStrategyMap()
99
103
  with open(strategy_filename, 'rb') as f:
100
104
  pb_content = f.read()
101
- parallel_strategy_map.ParseFromString(pb_content)
105
+ try:
106
+ parallel_strategy_map.ParseFromString(pb_content)
107
+ except BaseException as e:
108
+ raise TypeError("The strategy file type should be one of json or protobuf. "
109
+ "When the file name extension is not '.json', "
110
+ "the file is considered as a protobuf file.") from e
102
111
  return parallel_strategy_map
103
112
 
104
113
 
105
- def _build_searched_strategy(strategy_filename):
106
- """build searched strategy"""
107
- parallel_strategy_map = _load_strategy_file(strategy_filename)
108
-
114
+ def _build_protobuf_strategy(strategy_filename):
115
+ """build strategy from protobuf file"""
116
+ parallel_strategy_map = _load_protobuf_strategy(strategy_filename)
109
117
  layout_items = parallel_strategy_map.parallel_layout_item
110
118
  if not layout_items:
111
119
  raise ValueError(f"For 'build_searched_strategy', the strategy file {strategy_filename} has no sliced "
@@ -116,10 +124,94 @@ def _build_searched_strategy(strategy_filename):
116
124
  parameter_name = layout_item.param_name
117
125
  layout = layout_item.parallel_layouts
118
126
  strategy[parameter_name] = layout
127
+ return strategy
119
128
 
129
+
130
+ def _build_json_strategy(strategy_filename):
131
+ """build strategy from json file"""
132
+ with open(strategy_filename, 'r') as f:
133
+ json_content = json.load(f)
134
+ layout_items = json_content.get("parallel_layout_item")
135
+ strategy = {}
136
+ for parameter_name, layout_item in layout_items.items():
137
+ layout = ms.train.node_strategy_pb2.ParallelLayouts()
138
+ layout.field = layout_item.get("field")
139
+ layout.opt_weight_shard_size = layout_item.get("opt_weight_shard_size")
140
+ layout.opt_weight_shard_step = layout_item.get("opt_weight_shard_step")
141
+ dev_matrix = layout.dev_matrix.add()
142
+ for item in layout_item.get("dev_matrix"):
143
+ dev_matrix.dim.append(item)
144
+ tensor_map = layout.tensor_map.add()
145
+ for item in layout_item.get("tensor_map"):
146
+ tensor_map.dim.append(item)
147
+ param_split_shape = layout.param_split_shape.add()
148
+ if "param_split_shape" in layout_item:
149
+ for item in layout_item.get("param_split_shape"):
150
+ param_split_shape.dim.append(item)
151
+ indices_offset = layout.indices_offset.add()
152
+ if "indices_offset" in layout_item:
153
+ for item in layout_item.get("indices_offset"):
154
+ indices_offset.dim.append(item)
155
+ strategy[parameter_name] = layout
120
156
  return strategy
121
157
 
122
158
 
159
+ def _build_searched_strategy(strategy_filename):
160
+ """build searched strategy"""
161
+ _check_strategy_file(strategy_filename)
162
+ if strategy_filename[-5:] != ".json":
163
+ return _build_protobuf_strategy(strategy_filename)
164
+ return _build_json_strategy(strategy_filename)
165
+
166
+
167
+ def _merge_protobuf_strategy(src_strategy_files, dst_strategy_file):
168
+ """merge protobuf strategy"""
169
+ dst_parallel_strategy_map = ms.train.node_strategy_pb2.ParallelStrategyMap()
170
+ merged_stage = []
171
+ for src_strategy_file in src_strategy_files:
172
+ src_parallel_strategy_map = _load_protobuf_strategy(src_strategy_file)
173
+ strategy_items = src_parallel_strategy_map.parallel_strategy_item
174
+ layout_items = src_parallel_strategy_map.parallel_layout_item
175
+ if not strategy_items or not layout_items:
176
+ raise ValueError("The strategy file {} is empty".format(src_strategy_file))
177
+ pipeline_stage = strategy_items[0].parallel_strategys.stage
178
+ if pipeline_stage in merged_stage:
179
+ continue
180
+ for layout_item in layout_items:
181
+ layout_item.param_name = "-".join([str(pipeline_stage), layout_item.param_name])
182
+ dst_parallel_strategy_map.parallel_strategy_item.extend(strategy_items)
183
+ dst_parallel_strategy_map.parallel_layout_item.extend(layout_items)
184
+ merged_stage.append(pipeline_stage)
185
+ dst_parallel_strategy_map.current_stage = 1
186
+ with open(dst_strategy_file, "wb") as f:
187
+ f.write(dst_parallel_strategy_map.SerializeToString())
188
+
189
+
190
+ def _merge_json_strategy(src_strategy_files, dst_strategy_file):
191
+ """merge protobuf strategy"""
192
+ dst_parallel_strategy_map = {"current_stage": 1, "parallel_strategy_item": {}, "parallel_layout_item": {}}
193
+ merged_stage = []
194
+ for src_strategy_file in src_strategy_files:
195
+ with open(src_strategy_file, 'r') as f:
196
+ json_content = json.load(f)
197
+ layout_items = json_content.get("parallel_layout_item")
198
+ strategy_items = json_content.get("parallel_strategy_item")
199
+ if not strategy_items or not layout_items:
200
+ raise ValueError("The strategy file {} is empty".format(src_strategy_file))
201
+ pipeline_stage = strategy_items.get(list(strategy_items.keys())[0]).get('stage')
202
+ if pipeline_stage in merged_stage:
203
+ continue
204
+ for param_name, layout_item in layout_items.items():
205
+ new_layout_item = {}
206
+ new_param_name = "-".join([str(pipeline_stage), param_name])
207
+ new_layout_item[new_param_name] = layout_item
208
+ dst_parallel_strategy_map.get("parallel_layout_item").update(new_layout_item)
209
+ dst_parallel_strategy_map.get("parallel_strategy_item").update(strategy_items)
210
+ merged_stage.append(pipeline_stage)
211
+ with open(dst_strategy_file, "w") as f:
212
+ json.dump(dst_parallel_strategy_map, f)
213
+
214
+
123
215
  def _parameter_not_in_local_stage(param_name, origin_strategy_list, strategy_list):
124
216
  """parameter whether in the local stage"""
125
217
  if origin_strategy_list is None or strategy_list is None:
@@ -40,7 +40,7 @@ def _need_reset_device_target_for_ps(target):
40
40
  For Ascend backend, the card can't be occupied by multiple processes in distributed traning,
41
41
  so we need to reset the device target for some roles.
42
42
  '''
43
- is_server = (_get_ps_context("ms_role") in ["MS_PSERVER", "MS_SERVER", "MS_SCHED"])
43
+ is_server = (os.getenv('MS_ROLE') in ["MS_PSERVER", "MS_SERVER", "MS_SCHED"])
44
44
  return is_server and target == "Ascend"
45
45
 
46
46
 
@@ -184,10 +184,6 @@ def _reinsert_hash_table_size(new_name, cur_name, cache_vocab_size, embedding_si
184
184
  ps_context().reinsert_hash_table_size(new_name, cur_name, cache_vocab_size, embedding_size)
185
185
 
186
186
 
187
- def _insert_weight_init_info(name, global_seed, op_seed):
188
- ps_context().insert_weight_init_info(name, global_seed, op_seed)
189
-
190
-
191
187
  def _insert_accumu_init_info(name, init_val):
192
188
  ps_context().insert_accumu_init_info(name, init_val)
193
189
 
@@ -210,6 +206,14 @@ def _cache_enable():
210
206
  return ps_context().cache_enable()
211
207
 
212
208
 
209
+ def _set_cache_size(cache_size):
210
+ ps_context().set_cache_size(cache_size)
211
+
212
+
213
+ def _set_sparse_format(sparse_format):
214
+ ps_context().set_sparse_format(sparse_format)
215
+
216
+
213
217
  def _set_rank_id(rank_id):
214
218
  ps_context().set_rank_id(rank_id)
215
219
 
@@ -14,7 +14,7 @@
14
14
  # ============================================================================
15
15
  """Context for recovery"""
16
16
 
17
- from mindspore._checkparam import Validator
17
+ from mindspore import _checkparam as Validator
18
18
  from mindspore._c_expression import RecoveryContext
19
19
 
20
20
  RECOVERY_CONTEXT = None
@@ -175,20 +175,26 @@ def _chunk_tensor_by_strategy(np_tensor, strategy):
175
175
  return _chunk_tensor(np_tensor, strategy, len(strategy))
176
176
 
177
177
 
178
- def _get_slice_index(dev_mat, tensor_map):
178
+ def _get_slice_index(dev_mat, tensor_map, opt_shard_group):
179
179
  """
180
180
  Get the slice index for current slice.
181
181
 
182
182
  Args:
183
183
  dev_mat (list): The device matrix of devices.
184
184
  tensor_map (list): The split strategy of tensor.
185
+ opt_shard_group(string): The group of optimizer shard
185
186
 
186
187
  Returns:
187
188
  Integer, the slice index for slice on this device.
188
189
  """
189
190
  rank = get_rank()
191
+ dev_num = get_group_size()
190
192
  tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
191
193
  tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
194
+ if opt_shard_group:
195
+ tensor_slice_index += dev_num
196
+ opt_rank = get_rank(opt_shard_group)
197
+ tensor_slice_index += opt_rank
192
198
  return tensor_slice_index
193
199
 
194
200
 
@@ -15,17 +15,17 @@
15
15
  """
16
16
  NOTE:
17
17
  Transformer Networks.
18
- This is an experimental interface that is subject to change or deletion.
18
+ These are experimental APIs that are subject to change or deletion.
19
19
  """
20
20
  from __future__ import absolute_import
21
21
 
22
- from mindspore.nn.transformer.transformer import AttentionMask, VocabEmbedding, MultiHeadAttention, \
22
+ from mindspore.parallel._transformer.transformer import AttentionMask, VocabEmbedding, MultiHeadAttention, \
23
23
  FeedForward, TransformerEncoder, TransformerDecoder, TransformerEncoderLayer, TransformerDecoderLayer, \
24
24
  Transformer, TransformerOpParallelConfig, EmbeddingOpParallelConfig, TransformerRecomputeConfig
25
- from mindspore.nn.transformer.moe import MoEConfig
26
- from mindspore.nn.transformer.layers import FixedSparseAttention
27
- from mindspore.nn.transformer.loss import CrossEntropyLoss
28
- from mindspore.nn.transformer.op_parallel_config import OpParallelConfig
25
+ from mindspore.parallel._transformer.moe import MoEConfig
26
+ from mindspore.parallel._transformer.layers import FixedSparseAttention
27
+ from mindspore.parallel._transformer.loss import CrossEntropyLoss
28
+ from mindspore.parallel._transformer.op_parallel_config import OpParallelConfig
29
29
 
30
30
  __all__ = []
31
31
  __all__.extend(transformer.__all__)
@@ -33,11 +33,11 @@ from mindspore._extends import cell_attr_register
33
33
  from mindspore.nn.cell import Cell
34
34
  from mindspore.nn.layer.activation import get_activation
35
35
  from mindspore.ops import functional as F
36
- from mindspore._checkparam import Validator
36
+ from mindspore import _checkparam as Validator
37
37
  from mindspore.ops.primitive import constexpr
38
38
  from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation
39
39
  from mindspore.context import ParallelMode
40
- from mindspore.nn.transformer.op_parallel_config import default_dpmp_config, OpParallelConfig, MoEParallelConfig
40
+ from mindspore.parallel._transformer.op_parallel_config import default_dpmp_config, OpParallelConfig, MoEParallelConfig
41
41
  from mindspore import log as logger
42
42
 
43
43
  __all__ = [
@@ -161,7 +161,6 @@ class _LayerInputCheck:
161
161
  Check the input shape's is equal to the expected shape, the value on 0-th is viewed as batch, and the
162
162
  batch size will not be checked.
163
163
  """
164
- target_shape = target_shape
165
164
  length, hidden = target_shape
166
165
  if isinstance(input_shape, tuple):
167
166
  input_shape = list(input_shape)
@@ -200,28 +199,6 @@ def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):
200
199
  Validator.check_type_name(param_name, input_dtype, allow_dtypes, cls_name)
201
200
 
202
201
 
203
- @constexpr
204
- def _check_input_shape(input_shape, param_name, func_name, target_len):
205
- # check the input length
206
- _LayerInputCheck.check_shape_length(input_shape, param_name, func_name, target_len)
207
-
208
-
209
- @constexpr
210
- def _check_shape_equal(input_shape, param_name, func_name, target_shape):
211
- # check the input length
212
- _LayerInputCheck.check_shape_equal(input_shape, param_name, func_name, target_shape)
213
-
214
-
215
- @constexpr
216
- def _check_input_shape_value(input_shape, dim, param_name, cls_name, target_value):
217
- _LayerInputCheck.check_shape_value_on_axis(input_shape, dim, param_name, cls_name, target_value)
218
-
219
-
220
- @constexpr
221
- def _check_shape_equal_without_batch(input_shape, param_name, func_name, target_shape):
222
- _LayerInputCheck.check_shape_equal_without_batch(input_shape, param_name, func_name, target_shape)
223
-
224
-
225
202
  class _Dropout(nn.Cell):
226
203
  r"""
227
204
  A Dropout Implements with P.DropoutGenMask and P.DropoutDoMask for parallel training.
@@ -593,13 +570,13 @@ class FixedSparseAttention(nn.Cell):
593
570
  default args.
594
571
 
595
572
  Inputs:
596
- - **q** (Tensor) - Tensor query (:class:`mstype.fp16` [batch_size, seq_length, hidden_size]): Sequence of
573
+ - **q** (Tensor) - Tensor query ( `mstype.fp16` [batch_size, seq_length, hidden_size]): Sequence of
597
574
  queries to query the context.
598
- - **k** (Tensor) - Tensor key (:class:`mstype.fp16` [batch_size, seq_length, hidden_size]): Sequence of
575
+ - **k** (Tensor) - Tensor key ( `mstype.fp16` [batch_size, seq_length, hidden_size]): Sequence of
599
576
  queries to query the context.
600
- - **v** (Tensor) - Tensor value (:class:`mstype.fp16` [batch size, sequence length, Embedding Size]):
577
+ - **v** (Tensor) - Tensor value ( `mstype.fp16` [batch size, sequence length, Embedding Size]):
601
578
  Sequence of queries to query the context.
602
- - **attention_mask** (Tensor) - Float Tensor the mask of (:class:`mstype.fp32`, :class:`mstype.fp16`
579
+ - **attention_mask** (Tensor) - Float Tensor the mask of ( `mstype.fp32`, `mstype.fp16`
603
580
  [batch_size, seq_length, seq_length]): Lower triangular matrix to pass masked information.
604
581
 
605
582
  Outputs:
@@ -707,17 +684,9 @@ class FixedSparseAttention(nn.Cell):
707
684
  self.slice1 = P.StridedSlice().shard(((dp, 1, 1),))
708
685
 
709
686
  def construct(self, q, k, v, attention_mask):
710
- _check_shape_equal(F.shape(q), "q", self.cls_name,
711
- [self.batch_size, self.seq_length, self.hidden_size])
712
687
  _check_input_dtype(F.dtype(q), "q", [mstype.float16], self.cls_name)
713
- _check_shape_equal(F.shape(k), "k", self.cls_name,
714
- [self.batch_size, self.seq_length, self.hidden_size])
715
688
  _check_input_dtype(F.dtype(k), "k", [mstype.float16], self.cls_name)
716
- _check_shape_equal(F.shape(v), "v", self.cls_name,
717
- [self.batch_size, self.seq_length, self.hidden_size])
718
689
  _check_input_dtype(F.dtype(v), "v", [mstype.float16], self.cls_name)
719
- _check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
720
- [self.batch_size, self.seq_length, self.seq_length])
721
690
  _check_input_dtype(F.dtype(attention_mask), "attention_mask", [mstype.float32, mstype.float16], self.cls_name)
722
691
 
723
692
  q, k, v = self._transpose_inputs(q, k, v)
@@ -13,8 +13,8 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """
16
- Parallel Loss for the Parallel Training
17
- This is an experimental interface that is subject to change or deletion.
16
+ Parallel Loss for the Parallel Training.
17
+ These are experimental APIs that are subject to change or deletion.
18
18
  """
19
19
  from __future__ import absolute_import
20
20
 
@@ -30,8 +30,8 @@ from mindspore.context import ParallelMode
30
30
  from mindspore.parallel._utils import _get_device_num, _get_pipeline_stages
31
31
  from mindspore.log import _LogActionOnce
32
32
  from mindspore import log as logger
33
- from mindspore.nn.transformer.layers import _check_input_dtype, _check_input_shape
34
- from mindspore.nn.transformer.op_parallel_config import default_dpmp_config, OpParallelConfig
33
+ from mindspore.parallel._transformer.layers import _check_input_dtype
34
+ from mindspore.parallel._transformer.op_parallel_config import default_dpmp_config, OpParallelConfig
35
35
 
36
36
  __all__ = ["CrossEntropyLoss"]
37
37
 
@@ -247,7 +247,4 @@ class CrossEntropyLoss(Cell):
247
247
  _check_input_dtype(F.dtype(logits), "logits", [mstype.float32, mstype.float16], self.cls_name)
248
248
  _check_input_dtype(F.dtype(label), "label", [mstype.int32], self.cls_name)
249
249
  _check_input_dtype(F.dtype(input_mask), "input_mask", [mstype.float32], self.cls_name)
250
- _check_input_shape(F.shape(logits), "logits", self.cls_name, 2)
251
- _check_input_shape(F.shape(label), "label", self.cls_name, 1)
252
- _check_input_shape(F.shape(input_mask), "input_mask", self.cls_name, 1)
253
250
  return True
@@ -13,26 +13,27 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """
16
- Note: Mixture of Expert (MoE) structure. This is an experimental interface that is subject to change or deletion.
16
+ Note:
17
+ Mixture of Expert (MoE) structure.
18
+ These are experimental APIs that are subject to change or deletion.
17
19
  """
18
20
  from __future__ import absolute_import
19
21
  from __future__ import division
20
22
 
21
- import math
22
23
  import numpy as np
23
24
 
24
25
  from mindspore.common.tensor import Tensor
25
26
  import mindspore.common.dtype as mstype
26
27
  import mindspore.communication.management as D
27
- from mindspore._checkparam import Validator
28
+ from mindspore import _checkparam as Validator
28
29
  from mindspore.ops import operations as P
29
30
  from mindspore.ops import functional as F
30
- from mindspore.ops.primitive import constexpr
31
+ from mindspore.ops.primitive import _primexpr
31
32
  from mindspore.nn.cell import Cell
32
33
  from mindspore.nn.layer import Dense
33
34
  from mindspore.context import ParallelMode
34
35
  from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation
35
- from mindspore.nn.transformer.op_parallel_config import default_moeparallel_config
36
+ from mindspore.parallel._transformer.op_parallel_config import default_moeparallel_config
36
37
 
37
38
  __all__ = [
38
39
  "MoEConfig"]
@@ -132,9 +133,11 @@ def _check_moe_config(moe_config=None, parallel_config=None):
132
133
  f"should be less than device_num: {device_num}.")
133
134
 
134
135
 
135
- @constexpr
136
+ @_primexpr
136
137
  def calculate_expert_capacity(k, tokens_per_group, capacity_factor, expert_dim):
137
- return math.ceil(k * tokens_per_group * capacity_factor / expert_dim)
138
+ res = k * tokens_per_group * capacity_factor / expert_dim
139
+ res_int = int(res)
140
+ return res_int if res < 0 or res == res_int else res_int + 1
138
141
 
139
142
 
140
143
  class MoE(Cell):
@@ -187,7 +190,7 @@ class MoE(Cell):
187
190
  self.group_wise_a2a = moe_config.group_wise_a2a
188
191
  if not (self.mp > 1 and self.dp == self.ep):
189
192
  self.group_wise_a2a = False
190
- from mindspore.nn.transformer import FeedForward
193
+ from mindspore.parallel._transformer import FeedForward
191
194
 
192
195
  self.ffn = FeedForward(hidden_size=hidden_size,
193
196
  ffn_hidden_size=ffn_hidden_size,
@@ -232,7 +235,7 @@ class MoE(Cell):
232
235
  self.group_wise_a2a = moe_config.group_wise_a2a
233
236
  if not (self.mp > 1 and self.dp == self.ep):
234
237
  self.group_wise_a2a = False
235
- from mindspore.nn.transformer import FeedForward
238
+ from mindspore.parallel._transformer import FeedForward
236
239
 
237
240
  self.ffn = FeedForward(hidden_size=hidden_size,
238
241
  ffn_hidden_size=ffn_hidden_size,
@@ -269,8 +272,8 @@ class MoE(Cell):
269
272
  pad_size = 0
270
273
  if self.group_wise_a2a:
271
274
  # If capacity can't div by mp, pad for mp shard.
272
- if capacity%self.mp != 0:
273
- pad_size = self.mp-(capacity%self.mp)
275
+ if capacity % self.mp != 0:
276
+ pad_size = self.mp-(capacity % self.mp)
274
277
  if pad_size != 0:
275
278
  capacity += pad_size
276
279
  pad_tensor = self.stride_slice_dp(expert_input, (0, 0, 0, 0),
@@ -326,8 +329,8 @@ class MoE(Cell):
326
329
  """
327
330
  # Pad capacity for comp_comm_parallel_degree split.
328
331
  pad_size = 0
329
- if capacity%self.comp_comm_parallel_degree != 0:
330
- pad_size = self.comp_comm_parallel_degree-(capacity%self.comp_comm_parallel_degree)
332
+ if capacity % self.comp_comm_parallel_degree != 0:
333
+ pad_size = self.comp_comm_parallel_degree-(capacity % self.comp_comm_parallel_degree)
331
334
  capacity += pad_size
332
335
  pad_tensor = self.stride_slice_dp(expert_input, (0, 0, 0, 0),
333
336
  (self.expert_dim, self.dp_group, pad_size, self.hidden_size),
@@ -646,9 +649,10 @@ class TopkRouter(Cell):
646
649
  self.on_value, self.off_value))
647
650
  accum_combine_tensor = self.add2(accum_combine_tensor, combine_tensor)
648
651
 
649
- # expert weights normalization
650
- combine_tensor_sum = self.reduce_sum_keep2(self.reduce_sum_keep2(accum_combine_tensor, -1), -2)
651
- accum_combine_tensor = self.div2(accum_combine_tensor, self.add4(combine_tensor_sum, 1e-9))
652
+ # expert weights normalization when k > 1
653
+ if self.num_experts_chosen > 1:
654
+ combine_tensor_sum = self.reduce_sum_keep2(self.reduce_sum_keep2(accum_combine_tensor, -1), -2)
655
+ accum_combine_tensor = self.div2(accum_combine_tensor, self.add4(combine_tensor_sum, 1e-9))
652
656
  # dispatch_tensor is of boolean type. Here, using NotEqual instead of Cast, for that 'Cast to bool' has
653
657
  # bad performance
654
658
  dispatch_tensor = self.not_equal(accum_combine_tensor, 0.0)
@@ -13,12 +13,12 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """
16
- Parallel Config for the Parallel Training
17
- This is an experimental interface that is subject to change and/or deletion.
16
+ Parallel Config for the Parallel Training.
17
+ These are experimental APIs that are subject to change or deletion.
18
18
  """
19
19
  from __future__ import absolute_import
20
20
 
21
- from mindspore._checkparam import Validator
21
+ from mindspore import _checkparam as Validator
22
22
  from mindspore import context
23
23
  import mindspore.communication.management as D
24
24
  from mindspore.context import ParallelMode