mindspore 2.0.0a0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (655) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -2
  3. mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +102 -0
  7. mindspore/_checkparam.py +1066 -1001
  8. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
  9. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
  10. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
  11. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
  12. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  13. mindspore/_extends/parse/__init__.py +5 -3
  14. mindspore/_extends/parse/namespace.py +16 -1
  15. mindspore/_extends/parse/parser.py +107 -22
  16. mindspore/_extends/parse/resources.py +0 -7
  17. mindspore/_extends/parse/standard_method.py +885 -413
  18. mindspore/amp.py +52 -57
  19. mindspore/boost/boost.py +2 -2
  20. mindspore/boost/boost_cell_wrapper.py +38 -20
  21. mindspore/boost/dim_reduce.py +3 -3
  22. mindspore/boost/group_loss_scale_manager.py +1 -1
  23. mindspore/common/__init__.py +4 -6
  24. mindspore/common/_decorator.py +2 -0
  25. mindspore/common/_register_for_adapter.py +55 -0
  26. mindspore/common/_stub_tensor.py +201 -0
  27. mindspore/common/_utils.py +41 -7
  28. mindspore/common/api.py +215 -141
  29. mindspore/common/dtype.py +8 -1
  30. mindspore/common/dump.py +2 -2
  31. mindspore/common/initializer.py +4 -2
  32. mindspore/common/jit_config.py +17 -13
  33. mindspore/common/mutable.py +33 -13
  34. mindspore/common/parameter.py +23 -21
  35. mindspore/common/seed.py +8 -24
  36. mindspore/common/sparse_tensor.py +62 -41
  37. mindspore/common/tensor.py +852 -1154
  38. mindspore/communication/__init__.py +2 -2
  39. mindspore/communication/_comm_helper.py +11 -4
  40. mindspore/communication/management.py +22 -21
  41. mindspore/config/op_info.config +501 -1008
  42. mindspore/context.py +201 -23
  43. mindspore/dataset/__init__.py +6 -6
  44. mindspore/dataset/audio/__init__.py +7 -7
  45. mindspore/dataset/audio/transforms.py +670 -30
  46. mindspore/dataset/audio/utils.py +47 -4
  47. mindspore/dataset/audio/validators.py +223 -1
  48. mindspore/dataset/callback/ds_callback.py +2 -2
  49. mindspore/dataset/core/config.py +210 -14
  50. mindspore/dataset/core/validator_helpers.py +2 -2
  51. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  52. mindspore/dataset/debug/debug_hook.py +65 -0
  53. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  54. mindspore/dataset/engine/__init__.py +7 -3
  55. mindspore/dataset/engine/cache_client.py +1 -1
  56. mindspore/dataset/engine/datasets.py +322 -66
  57. mindspore/dataset/engine/datasets_audio.py +80 -76
  58. mindspore/dataset/engine/datasets_standard_format.py +51 -38
  59. mindspore/dataset/engine/datasets_text.py +232 -118
  60. mindspore/dataset/engine/datasets_user_defined.py +41 -17
  61. mindspore/dataset/engine/datasets_vision.py +746 -225
  62. mindspore/dataset/engine/graphdata.py +75 -10
  63. mindspore/dataset/engine/iterators.py +45 -5
  64. mindspore/dataset/engine/offload.py +48 -28
  65. mindspore/dataset/engine/validators.py +117 -8
  66. mindspore/dataset/text/__init__.py +6 -5
  67. mindspore/dataset/text/transforms.py +86 -3
  68. mindspore/dataset/text/utils.py +6 -4
  69. mindspore/dataset/text/validators.py +25 -0
  70. mindspore/dataset/transforms/__init__.py +3 -2
  71. mindspore/dataset/transforms/c_transforms.py +1 -1
  72. mindspore/dataset/transforms/transforms.py +2 -2
  73. mindspore/dataset/utils/__init__.py +2 -1
  74. mindspore/dataset/utils/line_reader.py +121 -0
  75. mindspore/dataset/vision/__init__.py +2 -3
  76. mindspore/dataset/vision/c_transforms.py +9 -9
  77. mindspore/dataset/vision/py_transforms.py +5 -5
  78. mindspore/dataset/vision/py_transforms_util.py +2 -0
  79. mindspore/dataset/vision/transforms.py +160 -161
  80. mindspore/dataset/vision/utils.py +3 -3
  81. mindspore/experimental/map_parameter.py +38 -26
  82. mindspore/include/OWNERS +0 -1
  83. mindspore/include/api/callback/callback.h +9 -13
  84. mindspore/include/api/callback/ckpt_saver.h +2 -2
  85. mindspore/include/api/callback/loss_monitor.h +2 -2
  86. mindspore/include/api/callback/lr_scheduler.h +5 -5
  87. mindspore/include/api/callback/time_monitor.h +2 -2
  88. mindspore/include/api/callback/train_accuracy.h +4 -6
  89. mindspore/include/api/cfg.h +19 -6
  90. mindspore/include/api/context.h +44 -9
  91. mindspore/include/api/delegate.h +1 -1
  92. mindspore/include/api/metrics/accuracy.h +2 -2
  93. mindspore/include/api/metrics/metrics.h +4 -3
  94. mindspore/include/api/model.h +9 -4
  95. mindspore/include/api/model_parallel_runner.h +2 -2
  96. mindspore/include/api/net.h +12 -11
  97. mindspore/include/api/serialization.h +19 -3
  98. mindspore/include/api/types.h +3 -3
  99. mindspore/include/dataset/constants.h +7 -0
  100. mindspore/include/dataset/text.h +59 -0
  101. mindspore/jpeg62.dll +0 -0
  102. mindspore/log.py +1 -1
  103. mindspore/mindrecord/filereader.py +18 -0
  104. mindspore/mindrecord/filewriter.py +197 -34
  105. mindspore/mindrecord/shardreader.py +9 -0
  106. mindspore/mindrecord/shardwriter.py +1 -1
  107. mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
  108. mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
  109. mindspore/mindrecord/tools/csv_to_mr.py +3 -3
  110. mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
  111. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  112. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  113. mindspore/mindspore_backend.dll +0 -0
  114. mindspore/mindspore_common.dll +0 -0
  115. mindspore/mindspore_core.dll +0 -0
  116. mindspore/mindspore_glog.dll +0 -0
  117. mindspore/mindspore_shared_lib.dll +0 -0
  118. mindspore/nn/__init__.py +0 -4
  119. mindspore/nn/cell.py +204 -132
  120. mindspore/nn/dynamic_lr.py +1 -1
  121. mindspore/nn/grad/cell_grad.py +7 -6
  122. mindspore/nn/layer/__init__.py +5 -4
  123. mindspore/nn/layer/activation.py +40 -89
  124. mindspore/nn/layer/basic.py +255 -624
  125. mindspore/nn/layer/channel_shuffle.py +7 -6
  126. mindspore/nn/layer/combined.py +1 -1
  127. mindspore/nn/layer/container.py +41 -4
  128. mindspore/nn/layer/conv.py +64 -28
  129. mindspore/nn/layer/dense.py +9 -8
  130. mindspore/nn/layer/embedding.py +27 -25
  131. mindspore/nn/layer/image.py +53 -46
  132. mindspore/nn/layer/math.py +97 -105
  133. mindspore/nn/layer/normalization.py +117 -86
  134. mindspore/nn/layer/padding.py +185 -95
  135. mindspore/nn/layer/pooling.py +817 -414
  136. mindspore/nn/layer/rnn_cells.py +10 -15
  137. mindspore/nn/layer/rnns.py +37 -38
  138. mindspore/nn/layer/thor_layer.py +11 -12
  139. mindspore/nn/layer/timedistributed.py +5 -5
  140. mindspore/nn/layer/transformer.py +701 -0
  141. mindspore/nn/learning_rate_schedule.py +8 -8
  142. mindspore/nn/loss/__init__.py +5 -4
  143. mindspore/nn/loss/loss.py +334 -199
  144. mindspore/nn/optim/ada_grad.py +6 -6
  145. mindspore/nn/optim/adadelta.py +2 -3
  146. mindspore/nn/optim/adafactor.py +4 -5
  147. mindspore/nn/optim/adam.py +126 -62
  148. mindspore/nn/optim/adamax.py +3 -4
  149. mindspore/nn/optim/adasum.py +6 -6
  150. mindspore/nn/optim/asgd.py +2 -2
  151. mindspore/nn/optim/ftrl.py +67 -38
  152. mindspore/nn/optim/lamb.py +4 -5
  153. mindspore/nn/optim/lars.py +2 -2
  154. mindspore/nn/optim/lazyadam.py +43 -4
  155. mindspore/nn/optim/momentum.py +6 -5
  156. mindspore/nn/optim/optimizer.py +3 -1
  157. mindspore/nn/optim/proximal_ada_grad.py +2 -2
  158. mindspore/nn/optim/rmsprop.py +1 -1
  159. mindspore/nn/optim/rprop.py +8 -9
  160. mindspore/nn/optim/sgd.py +19 -13
  161. mindspore/nn/optim/thor.py +10 -15
  162. mindspore/nn/probability/__init__.py +0 -2
  163. mindspore/nn/probability/bijector/bijector.py +4 -4
  164. mindspore/nn/probability/bijector/invert.py +1 -1
  165. mindspore/nn/probability/bijector/softplus.py +2 -2
  166. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  167. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  168. mindspore/nn/probability/distribution/_utils/utils.py +9 -15
  169. mindspore/nn/probability/distribution/bernoulli.py +3 -3
  170. mindspore/nn/probability/distribution/beta.py +1 -1
  171. mindspore/nn/probability/distribution/categorical.py +5 -7
  172. mindspore/nn/probability/distribution/cauchy.py +3 -3
  173. mindspore/nn/probability/distribution/distribution.py +2 -2
  174. mindspore/nn/probability/distribution/exponential.py +2 -2
  175. mindspore/nn/probability/distribution/gamma.py +3 -3
  176. mindspore/nn/probability/distribution/geometric.py +1 -1
  177. mindspore/nn/probability/distribution/gumbel.py +3 -3
  178. mindspore/nn/probability/distribution/half_normal.py +15 -11
  179. mindspore/nn/probability/distribution/laplace.py +16 -13
  180. mindspore/nn/probability/distribution/logistic.py +2 -2
  181. mindspore/nn/probability/distribution/normal.py +1 -1
  182. mindspore/nn/probability/distribution/poisson.py +1 -1
  183. mindspore/nn/probability/distribution/student_t.py +20 -15
  184. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  185. mindspore/nn/probability/distribution/uniform.py +2 -2
  186. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  187. mindspore/nn/reinforcement/tensor_array.py +2 -2
  188. mindspore/nn/sparse/sparse.py +2 -2
  189. mindspore/nn/wrap/cell_wrapper.py +27 -10
  190. mindspore/nn/wrap/grad_reducer.py +2 -2
  191. mindspore/nn/wrap/loss_scale.py +40 -24
  192. mindspore/numpy/array_creations.py +33 -22
  193. mindspore/numpy/array_ops.py +35 -30
  194. mindspore/numpy/logic_ops.py +6 -27
  195. mindspore/numpy/math_ops.py +22 -19
  196. mindspore/numpy/utils.py +1 -1
  197. mindspore/numpy/utils_const.py +108 -58
  198. mindspore/opencv_core452.dll +0 -0
  199. mindspore/opencv_imgcodecs452.dll +0 -0
  200. mindspore/opencv_imgproc452.dll +0 -0
  201. mindspore/ops/_constants.py +0 -6
  202. mindspore/ops/_grad/__init__.py +2 -1
  203. mindspore/ops/_grad/grad_array_ops.py +86 -117
  204. mindspore/ops/_grad/grad_base.py +23 -1
  205. mindspore/ops/_grad/grad_clip_ops.py +2 -3
  206. mindspore/ops/_grad/grad_comm_ops.py +34 -24
  207. mindspore/ops/_grad/grad_implementations.py +9 -45
  208. mindspore/ops/_grad/grad_inner_ops.py +47 -4
  209. mindspore/ops/_grad/grad_math_ops.py +142 -117
  210. mindspore/ops/_grad/grad_nn_ops.py +71 -165
  211. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  212. mindspore/ops/_grad/grad_sparse.py +7 -6
  213. mindspore/ops/_grad_experimental/__init__.py +1 -0
  214. mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
  215. mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
  216. mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
  217. mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
  218. mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
  219. mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
  220. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  221. mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
  222. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  223. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
  224. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  225. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  226. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
  227. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  228. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  229. mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
  230. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  231. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
  232. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  233. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  234. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  235. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  236. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  237. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  238. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  239. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  240. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  241. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  242. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  243. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  244. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  245. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  246. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  247. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  248. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  249. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  250. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
  251. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  252. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  253. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  254. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  255. mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
  256. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  257. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
  258. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  259. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  260. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  261. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  262. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  263. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
  264. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  265. mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
  266. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  267. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  268. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
  269. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  270. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  271. mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
  272. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  273. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  274. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  275. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  276. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  277. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  278. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  279. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  280. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  281. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
  282. mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
  283. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  284. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  285. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  286. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  287. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  288. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  289. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  290. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  291. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  292. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  293. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  294. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  295. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  296. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  297. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  298. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  299. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  300. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  301. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  302. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  303. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  304. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  305. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  306. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  307. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  308. mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
  309. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
  310. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  311. mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
  312. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  313. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  314. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  315. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  316. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  317. mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
  318. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
  319. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  320. mindspore/ops/_op_impl/tbe/__init__.py +27 -611
  321. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  322. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  323. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  324. mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
  325. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  326. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  327. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  328. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  329. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  330. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  331. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  332. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  333. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  334. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
  335. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  336. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  337. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  338. mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
  339. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  340. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  341. mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
  342. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
  343. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  344. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  345. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  346. mindspore/ops/_register_for_op.py +1 -0
  347. mindspore/ops/_utils/__init__.py +1 -2
  348. mindspore/ops/_utils/utils.py +19 -40
  349. mindspore/ops/_vmap/vmap_array_ops.py +116 -38
  350. mindspore/ops/_vmap/vmap_base.py +16 -9
  351. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  352. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  353. mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
  354. mindspore/ops/_vmap/vmap_image_ops.py +12 -5
  355. mindspore/ops/_vmap/vmap_math_ops.py +46 -5
  356. mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
  357. mindspore/ops/_vmap/vmap_random_ops.py +1 -1
  358. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  359. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  360. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  361. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  362. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  363. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  364. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  365. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  366. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
  367. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  368. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  369. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  370. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  371. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  372. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  373. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  374. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  375. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  376. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  377. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
  378. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
  379. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  380. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  381. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  382. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  383. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  384. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  385. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
  386. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  387. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
  388. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
  389. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  390. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  391. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  392. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  393. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  394. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  395. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  396. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  397. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  398. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  399. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  400. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  401. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  402. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  403. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  404. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  405. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  406. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  407. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  408. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
  409. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  410. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  411. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  412. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  413. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  414. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  415. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  416. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
  417. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
  418. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  419. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
  420. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  421. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  422. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  423. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  424. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  425. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  426. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  427. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  428. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  429. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
  430. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
  431. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  432. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  433. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  434. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  435. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
  436. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  437. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  438. mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
  439. mindspore/ops/composite/__init__.py +7 -8
  440. mindspore/ops/composite/base.py +101 -47
  441. mindspore/ops/composite/math_ops.py +188 -158
  442. mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
  443. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
  444. mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
  445. mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
  446. mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
  447. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  448. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  449. mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
  450. mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
  451. mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
  452. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
  453. mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
  454. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  455. mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
  456. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
  457. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
  458. mindspore/ops/function/__init__.py +152 -8
  459. mindspore/ops/function/array_func.py +2555 -674
  460. mindspore/ops/function/clip_func.py +209 -13
  461. mindspore/ops/function/debug_func.py +2 -2
  462. mindspore/ops/function/grad/__init__.py +2 -1
  463. mindspore/ops/function/grad/grad_func.py +147 -62
  464. mindspore/ops/function/image_func.py +54 -38
  465. mindspore/ops/function/linalg_func.py +167 -16
  466. mindspore/ops/function/math_func.py +4849 -1492
  467. mindspore/ops/function/nn_func.py +2573 -988
  468. mindspore/ops/function/other_func.py +115 -0
  469. mindspore/ops/function/parameter_func.py +3 -3
  470. mindspore/ops/function/random_func.py +790 -73
  471. mindspore/ops/function/sparse_func.py +98 -78
  472. mindspore/ops/function/sparse_unary_func.py +54 -53
  473. mindspore/ops/function/spectral_func.py +27 -24
  474. mindspore/ops/function/vmap_func.py +22 -2
  475. mindspore/ops/functional.py +97 -37
  476. mindspore/ops/op_info_register.py +70 -28
  477. mindspore/ops/operations/__init__.py +47 -14
  478. mindspore/ops/operations/_csr_ops.py +7 -7
  479. mindspore/ops/operations/_embedding_cache_ops.py +5 -5
  480. mindspore/ops/operations/_grad_ops.py +276 -187
  481. mindspore/ops/operations/_inner_ops.py +319 -113
  482. mindspore/ops/operations/_ms_kernel.py +10 -8
  483. mindspore/ops/operations/_ocr_ops.py +9 -9
  484. mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
  485. mindspore/ops/operations/_quant_ops.py +137 -102
  486. mindspore/ops/operations/_rl_inner_ops.py +121 -60
  487. mindspore/ops/operations/_scalar_ops.py +466 -0
  488. mindspore/ops/operations/_sequence_ops.py +1004 -2
  489. mindspore/ops/operations/_tensor_array.py +10 -11
  490. mindspore/ops/operations/_thor_ops.py +1 -1
  491. mindspore/ops/operations/array_ops.py +801 -466
  492. mindspore/ops/operations/comm_ops.py +51 -49
  493. mindspore/ops/operations/control_ops.py +2 -2
  494. mindspore/ops/operations/custom_ops.py +123 -44
  495. mindspore/ops/operations/debug_ops.py +24 -24
  496. mindspore/ops/operations/image_ops.py +240 -153
  497. mindspore/ops/operations/inner_ops.py +34 -50
  498. mindspore/ops/operations/linalg_ops.py +31 -9
  499. mindspore/ops/operations/math_ops.py +988 -757
  500. mindspore/ops/operations/nn_ops.py +965 -819
  501. mindspore/ops/operations/other_ops.py +51 -40
  502. mindspore/ops/operations/random_ops.py +204 -122
  503. mindspore/ops/operations/rl_ops.py +8 -9
  504. mindspore/ops/operations/sparse_ops.py +254 -93
  505. mindspore/ops/operations/spectral_ops.py +35 -3
  506. mindspore/ops/primitive.py +111 -9
  507. mindspore/parallel/_auto_parallel_context.py +189 -83
  508. mindspore/parallel/_offload_context.py +185 -0
  509. mindspore/parallel/_parallel_serialization.py +99 -7
  510. mindspore/parallel/_ps_context.py +9 -5
  511. mindspore/parallel/_recovery_context.py +1 -1
  512. mindspore/parallel/_tensor.py +7 -1
  513. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  514. mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
  515. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  516. mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
  517. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  518. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
  519. mindspore/parallel/_utils.py +1 -2
  520. mindspore/parallel/algo_parameter_config.py +1 -1
  521. mindspore/parallel/checkpoint_transform.py +37 -34
  522. mindspore/parallel/shard.py +17 -18
  523. mindspore/profiler/common/validator/validate_path.py +2 -2
  524. mindspore/profiler/envprofiling.py +69 -47
  525. mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
  526. mindspore/profiler/parser/base_timeline_generator.py +49 -56
  527. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
  528. mindspore/profiler/parser/hwts_log_parser.py +1 -1
  529. mindspore/profiler/parser/integrator.py +15 -14
  530. mindspore/profiler/parser/minddata_analyzer.py +2 -2
  531. mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
  532. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  533. mindspore/profiler/parser/optime_parser.py +17 -18
  534. mindspore/profiler/parser/profiler_info.py +2 -1
  535. mindspore/profiler/profiling.py +218 -186
  536. mindspore/rewrite/__init__.py +3 -1
  537. mindspore/rewrite/api/node.py +1 -114
  538. mindspore/rewrite/api/node_type.py +3 -0
  539. mindspore/rewrite/api/pattern_engine.py +31 -1
  540. mindspore/rewrite/api/scoped_value.py +4 -4
  541. mindspore/rewrite/api/symbol_tree.py +3 -78
  542. mindspore/rewrite/api/tree_node_helper.py +1 -1
  543. mindspore/rewrite/ast_creator_register.py +1 -0
  544. mindspore/rewrite/ast_helpers/__init__.py +2 -2
  545. mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
  546. mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
  547. mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
  548. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
  549. mindspore/rewrite/namespace.py +0 -2
  550. mindspore/rewrite/node.py +157 -11
  551. mindspore/rewrite/parsers/assign_parser.py +231 -53
  552. mindspore/rewrite/parsers/class_def_parser.py +187 -109
  553. mindspore/rewrite/parsers/for_parser.py +24 -14
  554. mindspore/rewrite/parsers/function_def_parser.py +21 -4
  555. mindspore/rewrite/parsers/if_parser.py +6 -2
  556. mindspore/rewrite/sparsify/__init__.py +0 -0
  557. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  558. mindspore/rewrite/sparsify/sparsify.py +109 -0
  559. mindspore/rewrite/sparsify/utils.py +173 -0
  560. mindspore/rewrite/symbol_tree.py +256 -133
  561. mindspore/rewrite/symbol_tree_builder.py +38 -1
  562. mindspore/run_check/_check_version.py +69 -63
  563. mindspore/run_check/run_check.py +2 -1
  564. mindspore/tinyxml2.dll +0 -0
  565. mindspore/train/__init__.py +1 -1
  566. mindspore/train/_utils.py +28 -5
  567. mindspore/train/amp.py +273 -102
  568. mindspore/train/callback/_backup_and_restore.py +5 -5
  569. mindspore/train/callback/_callback.py +2 -2
  570. mindspore/train/callback/_checkpoint.py +3 -3
  571. mindspore/train/callback/_early_stop.py +3 -3
  572. mindspore/train/callback/_lambda_callback.py +2 -2
  573. mindspore/train/callback/_landscape.py +29 -31
  574. mindspore/train/callback/_loss_monitor.py +3 -3
  575. mindspore/train/callback/_on_request_exit.py +3 -3
  576. mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
  577. mindspore/train/callback/_summary_collector.py +23 -16
  578. mindspore/train/callback/_time_monitor.py +3 -3
  579. mindspore/train/checkpoint_pb2.py +68 -8
  580. mindspore/train/data_sink.py +15 -3
  581. mindspore/train/dataset_helper.py +10 -15
  582. mindspore/train/loss_scale_manager.py +8 -11
  583. mindspore/train/metrics/__init__.py +1 -1
  584. mindspore/train/metrics/bleu_score.py +1 -1
  585. mindspore/train/metrics/confusion_matrix.py +1 -1
  586. mindspore/train/metrics/cosine_similarity.py +1 -1
  587. mindspore/train/metrics/dice.py +2 -2
  588. mindspore/train/metrics/fbeta.py +1 -1
  589. mindspore/train/metrics/hausdorff_distance.py +4 -3
  590. mindspore/train/metrics/mean_surface_distance.py +2 -2
  591. mindspore/train/metrics/occlusion_sensitivity.py +1 -1
  592. mindspore/train/metrics/perplexity.py +1 -1
  593. mindspore/train/metrics/precision.py +1 -1
  594. mindspore/train/metrics/recall.py +1 -1
  595. mindspore/train/metrics/roc.py +2 -2
  596. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  597. mindspore/train/mind_ir_pb2.py +116 -37
  598. mindspore/train/model.py +45 -28
  599. mindspore/train/serialization.py +295 -188
  600. mindspore/train/summary/_summary_adapter.py +1 -1
  601. mindspore/train/summary/summary_record.py +43 -13
  602. mindspore/train/train_thor/convert_utils.py +2 -2
  603. mindspore/train/train_thor/dataset_helper.py +3 -3
  604. mindspore/turbojpeg.dll +0 -0
  605. mindspore/version.py +1 -1
  606. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
  607. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
  608. mindspore/compression/__init__.py +0 -19
  609. mindspore/compression/common/constant.py +0 -124
  610. mindspore/compression/export/__init__.py +0 -19
  611. mindspore/compression/export/quant_export.py +0 -515
  612. mindspore/compression/quant/__init__.py +0 -28
  613. mindspore/compression/quant/qat.py +0 -634
  614. mindspore/compression/quant/quant_utils.py +0 -462
  615. mindspore/compression/quant/quantizer.py +0 -68
  616. mindspore/nn/layer/quant.py +0 -1868
  617. mindspore/nn/layer/rnn_utils.py +0 -90
  618. mindspore/nn/probability/dpn/__init__.py +0 -22
  619. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  620. mindspore/nn/probability/dpn/vae/cvae.py +0 -140
  621. mindspore/nn/probability/dpn/vae/vae.py +0 -124
  622. mindspore/nn/probability/infer/__init__.py +0 -22
  623. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  624. mindspore/nn/probability/infer/variational/svi.py +0 -84
  625. mindspore/nn/probability/toolbox/__init__.py +0 -22
  626. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  627. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
  628. mindspore/nn/probability/transforms/__init__.py +0 -22
  629. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  630. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  631. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  632. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  633. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  634. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  635. mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
  636. mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
  637. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
  638. mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
  639. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
  640. mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
  642. mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
  643. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
  644. mindspore/ops/composite/array_ops.py +0 -241
  645. mindspore/ops/composite/clip_ops.py +0 -134
  646. mindspore/ops/composite/random_ops.py +0 -426
  647. mindspore/ops/composite/vmap_ops.py +0 -38
  648. mindspore/parallel/nn/__init__.py +0 -42
  649. mindspore/parallel/nn/loss.py +0 -22
  650. mindspore/parallel/nn/moe.py +0 -21
  651. mindspore/parallel/nn/op_parallel_config.py +0 -22
  652. mindspore/parallel/nn/transformer.py +0 -31
  653. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  654. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  655. {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@
16
16
  from __future__ import absolute_import
17
17
 
18
18
  from mindspore.ops import functional as F, composite as C, operations as P
19
- from mindspore._checkparam import Validator as validator
19
+ from mindspore import _checkparam as validator
20
20
  from mindspore.common.api import jit
21
21
  from mindspore.nn.optim.optimizer import Optimizer
22
22
  from mindspore.nn.optim.optimizer import opt_init_args_register
@@ -53,7 +53,7 @@ class Adagrad(Optimizer):
53
53
  The updating Pseudo codes are as follows:
54
54
 
55
55
  .. math::
56
- \begin{aligned} \\
56
+ \begin{aligned} \\
57
57
  &\newline
58
58
  &\hline \\
59
59
  &\textbf{Parameters}: \text{learning rate } \gamma, \: \text{ params } w_0, \:
@@ -73,7 +73,7 @@ class Adagrad(Optimizer):
73
73
  &\bf{return} \: w_t \\[-1.ex]
74
74
  &\newline
75
75
  &\hline \\
76
- \end{aligned}
76
+ \end{aligned}
77
77
 
78
78
  :math:`state\_sum` stands for the accumulated squared sum of the gradients :math:`accum`.
79
79
  :math:`g` stands for `grads`, :math:`\lambda` stands for `weight_decay`.
@@ -112,7 +112,7 @@ class Adagrad(Optimizer):
112
112
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
113
113
  one group of `params`.
114
114
 
115
- accum (float): The starting value for `h`, must be zero or positive values. Default: 0.1.
115
+ accum (float): The starting value for :math:`h`, must be zero or positive values. Default: 0.1.
116
116
  learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
117
117
 
118
118
  - float: The fixed learning rate value. Must be equal to or greater than 0.
@@ -127,7 +127,7 @@ class Adagrad(Optimizer):
127
127
  - LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
128
128
  LearningRateSchedule with step as the input to get the learning rate of current step.
129
129
 
130
- update_slots (bool): Whether the `h` will be updated. Default: True.
130
+ update_slots (bool): Whether the :math:`h` will be updated. Default: True.
131
131
  loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
132
132
  Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
133
133
  `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
@@ -159,7 +159,7 @@ class Adagrad(Optimizer):
159
159
  ValueError: If `accum` or `weight_decay` is less than 0.
160
160
 
161
161
  Supported Platforms:
162
- ``Ascend`` ``CPU`` ``GPU``
162
+ ``Ascend`` ``GPU`` ``CPU``
163
163
 
164
164
  Examples:
165
165
  >>> import mindspore as ms
@@ -16,8 +16,7 @@
16
16
  from __future__ import absolute_import
17
17
 
18
18
  from mindspore.ops import functional as F, composite as C, operations as P
19
- from mindspore._checkparam import Validator as validator
20
- from mindspore._checkparam import Rel
19
+ from mindspore import _checkparam as validator
21
20
  from mindspore.common.tensor import Tensor
22
21
  from mindspore.nn.optim.optimizer import Optimizer
23
22
  from mindspore.nn.optim.optimizer import opt_init_args_register
@@ -37,7 +36,7 @@ def _check_param_value(rho, epsilon, prim_name=None):
37
36
  """Check inputs param."""
38
37
  validator.check_value_type("rho", rho, [float], prim_name)
39
38
  validator.check_value_type("epsilon", epsilon, [float], prim_name)
40
- validator.check_float_range(rho, 0.0, 1.0, Rel.INC_BOTH, "rho", prim_name)
39
+ validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", prim_name)
41
40
  validator.check_non_negative_float(epsilon, "epsilon", prim_name)
42
41
 
43
42
 
@@ -25,8 +25,7 @@ from mindspore.ops import composite as C
25
25
  from mindspore.ops import functional as F
26
26
  from mindspore.common.parameter import Parameter, ParameterTuple
27
27
  from mindspore.common.tensor import Tensor
28
- from mindspore._checkparam import Validator as validator
29
- from mindspore._checkparam import Rel
28
+ from mindspore import _checkparam as validator
30
29
  from mindspore.nn.optim.optimizer import opt_init_args_register
31
30
  from mindspore.nn.optim.optimizer import Optimizer
32
31
 
@@ -153,7 +152,7 @@ class AdaFactor(Optimizer):
153
152
  Cost <https://arxiv.org/abs/1804.04235>`_.
154
153
 
155
154
  .. warning::
156
- This is an experimental prototype that is subject to change and/or deletion.
155
+ This is an experimental API that is subject to change or deletion.
157
156
 
158
157
  Adafactor for weight vector are as follows,
159
158
 
@@ -316,8 +315,8 @@ class AdaFactor(Optimizer):
316
315
  validator.check_value_type("clip_threshold", clip_threshold, [float], self.cls_name)
317
316
  validator.check_non_negative_float(clip_threshold, "clip_threshold", self.cls_name)
318
317
  validator.check_value_type("decay_rate", decay_rate, [float], self.cls_name)
319
- validator.check_float_range(decay_rate, 0, 1, Rel.INC_NEITHER, "decay_rate", self.cls_name)
320
- validator.check_float_range(weight_decay, 0, 1, Rel.INC_LEFT, "weight_decay", self.cls_name)
318
+ validator.check_float_range(decay_rate, 0, 1, validator.INC_NEITHER, "decay_rate", self.cls_name)
319
+ validator.check_float_range(weight_decay, 0, 1, validator.INC_LEFT, "weight_decay", self.cls_name)
321
320
  validator.check_value_type("scale_parameter", scale_parameter, [bool], self.cls_name)
322
321
  validator.check_value_type("relative_step", relative_step, [bool], self.cls_name)
323
322
  validator.check_value_type("compression", compression, [bool], self.cls_name)
@@ -26,8 +26,7 @@ from mindspore.ops import composite as C
26
26
  from mindspore.ops import functional as F
27
27
  from mindspore.common.parameter import Parameter
28
28
  from mindspore.common.tensor import Tensor
29
- from mindspore._checkparam import Validator as validator
30
- from mindspore._checkparam import Rel
29
+ from mindspore import _checkparam as validator
31
30
  from mindspore.nn.optim.optimizer import Optimizer
32
31
  from mindspore.nn.optim.optimizer import opt_init_args_register
33
32
  from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
@@ -93,6 +92,46 @@ def _run_lazy_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use
93
92
  return success
94
93
 
95
94
 
95
+ @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
96
+ "Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
97
+ "Bool", "Bool", "Function", "Bool", "Function", "Bool")
98
+ def _run_map_tensor_lazy_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target,
99
+ beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, m, v,
100
+ ps_parameter, cache_enable, distributed_opt, use_flag,
101
+ distributed_sparse_opt, use_sparse_flag):
102
+ """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
103
+ success = True
104
+ indices, values = gradient.get_data()
105
+ if use_sparse_flag:
106
+ # PS Mode.
107
+ success = F.depend(success, distributed_sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
108
+ eps, values, indices))
109
+ else:
110
+ # PS Cache mode.
111
+ op_sqrt = P.Sqrt()
112
+
113
+ m_slice = m.get(indices)
114
+ v_slice = v.get(indices)
115
+
116
+ next_m = m_slice * beta1 + values * (1 - beta1)
117
+ next_v = v_slice * beta2 + values * values * (1 - beta2)
118
+
119
+ lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
120
+
121
+ if use_nesterov:
122
+ m_temp = beta1 * next_m + values * (1 - beta1)
123
+ param_update = m_temp / (op_sqrt(next_v) + eps)
124
+ else:
125
+ param_update = next_m / (op_sqrt(next_v) + eps)
126
+
127
+ params_need_update = params.get(indices)
128
+ params.put(indices, params_need_update - lr_t * param_update)
129
+ m.put(indices, next_m)
130
+ v.put(indices, next_v)
131
+
132
+ return success
133
+
134
+
96
135
  @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
97
136
  "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool",
98
137
  "Function", "Bool", "Function", "Bool")
@@ -360,18 +399,14 @@ def _run_opt_with_one_number_dist(opt, sparse_opt, push, pull, use_locking, use_
360
399
 
361
400
 
362
401
  @_adam_opt.register("Function", "Function", "Function", "Function",
363
- "Bool", "Bool", "Bool", "Bool",
402
+ "Bool", "Bool", "Bool",
364
403
  "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
365
- "RowTensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
404
+ "RowTensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
366
405
  def _run_opt_with_sparse(opt, sparse_opt, push, pull,
367
- use_locking, use_nesterov, use_amsgrad, target,
406
+ use_locking, use_nesterov, target,
368
407
  beta1_power, beta2_power, beta1, beta2, eps, lr,
369
- gradient, param, m, v, vhat, ps_parameter, cache_enable):
408
+ gradient, param, m, v, ps_parameter, cache_enable):
370
409
  """Apply sparse adam optimizer to the weight parameter when the gradient is sparse."""
371
- if use_amsgrad:
372
- raise Exception("""Adam with amsgrad is currently not supported when the gradients are sparse!
373
- Please set use_amsgrad=False for sparse gradients.""")
374
-
375
410
  success = True
376
411
  indices = gradient.indices
377
412
  values = gradient.values
@@ -429,30 +464,42 @@ def _run_opt_with_sparse(opt, sparse_opt, push, pull,
429
464
 
430
465
 
431
466
  @_adam_opt.register("Function", "Function", "Function", "Function",
432
- "Bool", "Bool", "Bool", "Bool",
467
+ "Bool", "Bool", "Bool",
433
468
  "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
434
- "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
469
+ "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
435
470
  def _run_opt_with_one_number(opt, sparse_opt, push, pull,
436
- use_locking, use_nesterov, use_amsgrad, target,
471
+ use_locking, use_nesterov, target,
437
472
  beta1_power, beta2_power, beta1, beta2, eps, lr,
438
- gradient, param, moment1, moment2, vhat, ps_parameter, cache_enable):
473
+ gradient, param, moment1, moment2, ps_parameter, cache_enable):
439
474
  """Apply adam optimizer to the weight parameter using Tensor."""
440
475
  success = True
441
476
  if ps_parameter and not cache_enable:
442
477
  op_shape = P.Shape()
443
- if use_amsgrad:
444
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, gradient),
445
- (op_shape(param), op_shape(moment1), op_shape(moment2),
446
- op_shape(vhat))), param))
447
- else:
448
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
449
- (op_shape(param), op_shape(moment1), op_shape(moment2))), param))
478
+ success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
479
+ (op_shape(param), op_shape(moment1), op_shape(moment2))), param))
450
480
  else:
451
- if use_amsgrad:
452
- success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power, lr, gradient))
453
- else:
454
- success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
455
- eps, gradient))
481
+ success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
482
+ eps, gradient))
483
+ return success
484
+
485
+
486
+ @_adam_opt.register("Function", "Function", "Function", "Function",
487
+ "Bool", "Bool", "Bool",
488
+ "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
489
+ "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
490
+ def _run_opt_with_one_number_use_amsgrad(opt, sparse_opt, push, pull,
491
+ use_locking, use_nesterov, target,
492
+ beta1_power, beta2_power, beta1, beta2, eps, lr,
493
+ gradient, param, moment1, moment2, vhat, ps_parameter, cache_enable):
494
+ """Apply adam optimizer to the weight parameter using Tensor and use amsgrad."""
495
+ success = True
496
+ if ps_parameter and not cache_enable:
497
+ op_shape = P.Shape()
498
+ success = F.depend(success, pull(push((beta1_power, beta2_power, lr, gradient),
499
+ (op_shape(param), op_shape(moment1), op_shape(moment2),
500
+ op_shape(vhat))), param))
501
+ else:
502
+ success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power, lr, gradient))
456
503
  return success
457
504
 
458
505
 
@@ -484,8 +531,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
484
531
  validator.check_value_type("beta1", beta1, [float], prim_name)
485
532
  validator.check_value_type("beta2", beta2, [float], prim_name)
486
533
  validator.check_value_type("eps", eps, [float], prim_name)
487
- validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name)
488
- validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name)
534
+ validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
535
+ validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
489
536
  validator.check_positive_float(eps, "eps", prim_name)
490
537
 
491
538
 
@@ -723,7 +770,8 @@ class Adam(Optimizer):
723
770
  self.use_offload = use_offload
724
771
  self.moment1 = self._parameters.clone(prefix="moment1", init='zeros')
725
772
  self.moment2 = self._parameters.clone(prefix="moment2", init='zeros')
726
- self.vhat = self._parameters.clone(prefix="vhat", init='zeros')
773
+ if use_amsgrad:
774
+ self.vhat = self._parameters.clone(prefix="vhat", init='zeros')
727
775
 
728
776
  if use_offload:
729
777
  self.opt = P.AdamNoUpdateParam(use_locking, use_nesterov)
@@ -756,25 +804,8 @@ class Adam(Optimizer):
756
804
 
757
805
  self._init_distributed_opts(use_locking, use_nesterov)
758
806
 
759
- @jit
760
- def construct(self, gradients):
761
- params = self._parameters
762
- moment1 = self.moment1
763
- moment2 = self.moment2
764
- vhat = self.vhat
765
- gradients = self.flatten_gradients(gradients)
766
- gradients = self.decay_weight(gradients)
767
- if not self.use_offload:
768
- gradients = self.gradients_centralization(gradients)
769
- gradients = self.scale_grad(gradients)
770
- gradients = self._grad_sparse_indices_deduplicate(gradients)
771
- lr = self.get_lr()
772
-
773
- beta1_power = self.beta1_power * self.beta1
774
- self.beta1_power = beta1_power
775
- beta2_power = self.beta2_power * self.beta2
776
- self.beta2_power = beta2_power
777
-
807
+ def _apply_adam(self, params, beta1_power, beta2_power, moment1, moment2, lr, gradients):
808
+ """Execute Adam optimizer and its variants."""
778
809
  if self.use_offload:
779
810
  if self.is_group_lr:
780
811
  success = self.map_reverse(F.partial(_adam_opt, self.opt, beta1_power, beta2_power, self.beta1,
@@ -831,13 +862,19 @@ class Adam(Optimizer):
831
862
  self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
832
863
  self.eps), lr, gradients, params, moment1, moment2,
833
864
  self.ps_parameters, self.cache_enable)
834
-
835
865
  else:
836
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
837
- self._ps_pull, self.use_locking, self.use_nesterov,
838
- self.use_amsgrad, self._is_device, beta1_power, beta2_power,
839
- self.beta1, self.beta2, self.eps), lr, gradients, params,
840
- moment1, moment2, vhat, self.ps_parameters, self.cache_enable)
866
+ if self.use_amsgrad:
867
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
868
+ self._ps_pull, self.use_locking, self.use_nesterov,
869
+ self._is_device, beta1_power, beta2_power,
870
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
871
+ moment1, moment2, self.vhat, self.ps_parameters, self.cache_enable)
872
+ else:
873
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
874
+ self._ps_pull, self.use_locking, self.use_nesterov,
875
+ self._is_device, beta1_power, beta2_power,
876
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
877
+ moment1, moment2, self.ps_parameters, self.cache_enable)
841
878
  else:
842
879
  if self.use_lazy:
843
880
  success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt, self._ps_push,
@@ -845,15 +882,42 @@ class Adam(Optimizer):
845
882
  self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
846
883
  self.eps, lr), gradients, params, moment1, moment2,
847
884
  self.ps_parameters, self.cache_enable)
848
-
849
885
  else:
850
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
851
- self._ps_pull, self.use_locking, self.use_nesterov,
852
- self.use_amsgrad, self._is_device, beta1_power, beta2_power,
853
- self.beta1, self.beta2, self.eps, lr), gradients, params,
854
- moment1, moment2, vhat, self.ps_parameters, self.cache_enable)
886
+ if self.use_amsgrad:
887
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
888
+ self._ps_pull, self.use_locking, self.use_nesterov,
889
+ self._is_device, beta1_power, beta2_power,
890
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
891
+ moment1, moment2, self.vhat, self.ps_parameters, self.cache_enable)
892
+ else:
893
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
894
+ self._ps_pull, self.use_locking, self.use_nesterov,
895
+ self._is_device, beta1_power, beta2_power,
896
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
897
+ moment1, moment2, self.ps_parameters, self.cache_enable)
898
+
855
899
  return success
856
900
 
901
+ @jit
902
+ def construct(self, gradients):
903
+ params = self._parameters
904
+ moment1 = self.moment1
905
+ moment2 = self.moment2
906
+ gradients = self.flatten_gradients(gradients)
907
+ gradients = self.decay_weight(gradients)
908
+ if not self.use_offload:
909
+ gradients = self.gradients_centralization(gradients)
910
+ gradients = self.scale_grad(gradients)
911
+ gradients = self._grad_sparse_indices_deduplicate(gradients)
912
+ lr = self.get_lr()
913
+
914
+ beta1_power = self.beta1_power * self.beta1
915
+ self.beta1_power = beta1_power
916
+ beta2_power = self.beta2_power * self.beta2
917
+ self.beta2_power = beta2_power
918
+
919
+ return self._apply_adam(params, beta1_power, beta2_power, moment1, moment2, lr, gradients)
920
+
857
921
  @Optimizer.target.setter
858
922
  def target(self, value):
859
923
  """
@@ -907,13 +971,13 @@ class AdamWeightDecay(Optimizer):
907
971
  :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
908
972
  :math:`g` represents `gradients`, :math:`\gamma` represents `learning_rate`,
909
973
  :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, :math:`t` represents the current step,
910
- :math:`w` represents `params`, :math:`\gamma` represents `weight_decay`.
974
+ :math:`w` represents `params`, :math:`\lambda` represents `weight_decay`.
911
975
 
912
976
  Note:
913
977
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
914
978
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
915
979
  As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
916
- document `LossScale <https://www.mindspore.cn/tutorials/zh-CN/r2.0.0-alpha/advanced/mixed_precision.html>`_ to
980
+ document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
917
981
  process `loss_scale` correctly.
918
982
 
919
983
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
@@ -23,10 +23,9 @@ from mindspore.ops import composite as C
23
23
  from mindspore.ops import functional as F
24
24
  from mindspore.common.parameter import Parameter
25
25
  from mindspore.common.tensor import Tensor
26
- from mindspore._checkparam import Validator as validator
26
+ from mindspore import _checkparam as validator
27
27
  from mindspore.nn.optim.optimizer import Optimizer
28
28
  from mindspore.nn.optim.optimizer import opt_init_args_register
29
- from mindspore._checkparam import Rel
30
29
 
31
30
  _ada_max_opt = C.MultitypeFuncGraph("ada_max_opt")
32
31
 
@@ -44,8 +43,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
44
43
  validator.check_value_type("beta1", beta1, [float], prim_name)
45
44
  validator.check_value_type("beta2", beta2, [float], prim_name)
46
45
  validator.check_value_type("eps", eps, [float], prim_name)
47
- validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name)
48
- validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name)
46
+ validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
47
+ validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
49
48
  validator.check_positive_float(eps, "eps", prim_name)
50
49
 
51
50
 
@@ -22,7 +22,7 @@ import math
22
22
  import mindspore.nn as nn
23
23
  import mindspore.log as logger
24
24
  from mindspore import context
25
- from mindspore._checkparam import Validator as validator
25
+ from mindspore import _checkparam as validator
26
26
  from mindspore.nn.cell import Cell
27
27
  from mindspore.common.parameter import ParameterTuple, Parameter
28
28
  from mindspore.parallel._utils import _get_global_rank, _get_stage_device_num
@@ -114,7 +114,7 @@ def _adasum_opt_forward_process(left_send, allreduce, parameter_divisibility, al
114
114
  if parameter_divisibility:
115
115
  delta_w = P.Squeeze()(delta_w)
116
116
  ori_len = F.shape(delta_w)[0]
117
- divide_len = ori_len / 2
117
+ divide_len = ori_len // 2
118
118
  left_part = delta_w[:divide_len]
119
119
  right_part = delta_w[divide_len:]
120
120
  else:
@@ -412,8 +412,8 @@ class AdaSumByGradWrapCell(Cell):
412
412
  .. math::
413
413
  \begin{array}{ll}
414
414
  w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
415
- w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
416
- (1 - \frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
415
+ w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
416
+ \frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
417
417
  \end{array}
418
418
 
419
419
  In this implementation, :math:`g` represents the gradient of the weights,
@@ -479,8 +479,8 @@ class AdaSumByDeltaWeightWrapCell(Cell):
479
479
  .. math::
480
480
  \begin{array}{ll}
481
481
  w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
482
- w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
483
- (1 - \frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
482
+ w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
483
+ \frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
484
484
  \end{array}
485
485
 
486
486
  In this implementation, :math:`g` represents the weight difference before and after the updating of optimizer,
@@ -21,7 +21,7 @@ from mindspore.common.api import jit
21
21
  from mindspore.common.tensor import Tensor
22
22
  import mindspore.common.dtype as mstype
23
23
  import mindspore
24
- from mindspore._checkparam import Validator as validator
24
+ from mindspore import _checkparam as validator
25
25
  from mindspore.nn.optim.optimizer import Optimizer
26
26
  from mindspore.nn.optim.optimizer import opt_init_args_register
27
27
 
@@ -96,7 +96,7 @@ class ASGD(Optimizer):
96
96
  LearningRateSchedule with step as the input to get the learning rate of current step.
97
97
 
98
98
  lambd (float): The decay term. Default: 1e-4.
99
- alpha (float): The power for :math:`eta` update. Default: 0.75.
99
+ alpha (float): The power for :math:`\eta` update. Default: 0.75.
100
100
  t0 (float): The point of starting averaging. Default: 1e6.
101
101
  weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
102
102
 
@@ -18,8 +18,7 @@ from __future__ import absolute_import
18
18
  from mindspore.ops import functional as F, composite as C, operations as P
19
19
  from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
20
20
  from mindspore.common.api import jit
21
- from mindspore._checkparam import Validator as validator
22
- from mindspore._checkparam import Rel
21
+ from mindspore import _checkparam as validator
23
22
  from mindspore.nn.optim.optimizer import Optimizer
24
23
  from mindspore.nn.optim.optimizer import opt_init_args_register
25
24
  from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
@@ -48,6 +47,62 @@ def _tensor_run_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_powe
48
47
  return success
49
48
 
50
49
 
50
+ def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values):
51
+ """Apllpy ftrl optimizer for map parameter"""
52
+ success = True
53
+ linear_slice = linear.get(indices)
54
+ moment_slice = moment.get(indices)
55
+ weight_slice = weight.get(indices)
56
+
57
+ op_pow = P.Pow()
58
+ op_sign = P.Sign()
59
+ op_greater = P.Greater()
60
+ op_select = P.Select()
61
+ op_abs = P.Abs()
62
+
63
+ lr_power_val = -lr_power
64
+ accu_pow = op_pow(moment_slice, lr_power_val)
65
+ moment_slice = F.depend(moment_slice, accu_pow)
66
+ cur_accu = moment_slice + values * values
67
+ cur_accu_pow = op_pow(cur_accu, lr_power_val)
68
+ sigma = (cur_accu_pow - accu_pow) / learning_rate
69
+
70
+ linear_slice = linear_slice + values - sigma * weight_slice
71
+
72
+ update_weight_cond = op_greater(op_abs(linear_slice), l1)
73
+ updated_weight = (l1 * op_sign(linear_slice) - linear_slice) / (cur_accu_pow / learning_rate + 2 * l2)
74
+ zeros = zeros_like(weight_slice)
75
+
76
+ weight_slice = op_select(update_weight_cond, updated_weight, zeros)
77
+ moment_slice = cur_accu
78
+
79
+ moment.put(indices, moment_slice)
80
+ linear.put(indices, linear_slice)
81
+ weight.put(indices, weight_slice)
82
+
83
+ return success
84
+
85
+
86
+ @_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
87
+ "MapTensor", "MapTensor", "MapTensor", "Bool", "Bool",
88
+ "Function", "Bool", "Function", "Bool")
89
+ def _run_map_tensor_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear,
90
+ gradient, weight, moment, ps_parameter, cache_enable,
91
+ distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
92
+ """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
93
+ success = True
94
+ indices, values = gradient.get_data()
95
+ if use_sparse_flag:
96
+ # PS Mode.
97
+ success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
98
+ elif cache_enable:
99
+ # PS Cache mode.
100
+ _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
101
+ else:
102
+ raise Exception("Unexpected mode for distributed optimizer.")
103
+ return success
104
+
105
+
51
106
  @_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
52
107
  "Tensor", "Tensor", "Tensor", "Bool", "Bool",
53
108
  "Function", "Bool", "Function", "Bool")
@@ -91,36 +146,7 @@ def _run_map_tensor_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power
91
146
  """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
92
147
  success = True
93
148
  indices, values = gradient.get_data()
94
-
95
- linear_slice = linear.get(indices)
96
- moment_slice = moment.get(indices)
97
- weight_slice = weight.get(indices)
98
-
99
- op_pow = P.Pow()
100
- op_sign = P.Sign()
101
- op_greater = P.Greater()
102
- op_select = P.Select()
103
-
104
- lr_power_val = -lr_power
105
- accu_pow = op_pow(moment_slice, lr_power_val)
106
- moment_slice = F.depend(moment_slice, accu_pow)
107
- cur_accu = moment_slice + values * values
108
- cur_accu_pow = op_pow(cur_accu, lr_power_val)
109
- sigma = (cur_accu_pow - accu_pow) / learning_rate
110
-
111
- linear_slice = linear_slice + values - sigma * weight_slice
112
-
113
- update_weight_cond = op_greater(linear_slice, l1)
114
- updated_weight = (l1 * op_sign(linear_slice) - linear_slice) / (cur_accu_pow / learning_rate + 2 * l2)
115
- zeros = zeros_like(weight_slice)
116
-
117
- weight_slice = op_select(update_weight_cond, updated_weight, zeros)
118
- moment_slice = cur_accu
119
-
120
- moment.put(indices, moment_slice)
121
- linear.put(indices, linear_slice)
122
- weight.put(indices, weight_slice)
123
-
149
+ _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
124
150
  return success
125
151
 
126
152
 
@@ -139,19 +165,22 @@ def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate,
139
165
  return success
140
166
 
141
167
 
142
- def _check_param(initial_accum, lr_power, l1, l2, use_locking, prim_name=None):
168
+ def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, prim_name=None):
143
169
  """Check param."""
144
170
  validator.check_value_type("initial_accum", initial_accum, [float], prim_name)
145
- validator.check_number("initial_accum", initial_accum, 0.0, Rel.GE, prim_name)
171
+ validator.check_number("initial_accum", initial_accum, 0.0, validator.GE, prim_name)
172
+
173
+ validator.check_value_type("learning_rate", learning_rate, [float], prim_name)
174
+ validator.check_positive_float(learning_rate, "learning_rate", prim_name)
146
175
 
147
176
  validator.check_value_type("lr_power", lr_power, [float], prim_name)
148
- validator.check_number("lr_power", lr_power, 0.0, Rel.LE, prim_name)
177
+ validator.check_number("lr_power", lr_power, 0.0, validator.LE, prim_name)
149
178
 
150
179
  validator.check_value_type("l1", l1, [float], prim_name)
151
- validator.check_number("l1", l1, 0.0, Rel.GE, prim_name)
180
+ validator.check_number("l1", l1, 0.0, validator.GE, prim_name)
152
181
 
153
182
  validator.check_value_type("l2", l2, [float], prim_name)
154
- validator.check_number("l2", l2, 0.0, Rel.GE, prim_name)
183
+ validator.check_number("l2", l2, 0.0, validator.GE, prim_name)
155
184
 
156
185
  validator.check_value_type("use_locking", use_locking, [bool], prim_name)
157
186
 
@@ -295,7 +324,7 @@ class FTRL(Optimizer):
295
324
  raise ValueError(f"For 'FTRL', dynamic learning rate and group learning rate are currently not supported "
296
325
  f"in FTRL, they should all be false, but got dynamic learning rate {self.dynamic_lr} and"
297
326
  f" group learning rate {self.is_group_lr}.")
298
- _check_param(initial_accum, lr_power, l1, l2, use_locking, self.cls_name)
327
+ _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, self.cls_name)
299
328
  self.moments = self._parameters.clone(prefix="moments", init=initial_accum)
300
329
  self.linear = self._parameters.clone(prefix="linear", init='zeros')
301
330
  self.l1 = l1
@@ -23,8 +23,7 @@ from mindspore.ops import functional as F
23
23
  from mindspore.ops.operations import _inner_ops as inner
24
24
  from mindspore.common.tensor import Tensor
25
25
  from mindspore.common.api import jit
26
- from mindspore._checkparam import Validator as validator
27
- from mindspore._checkparam import Rel
26
+ from mindspore import _checkparam as validator
28
27
  from mindspore.nn.optim.optimizer import Optimizer
29
28
  from mindspore.nn.optim.optimizer import opt_init_args_register
30
29
 
@@ -69,8 +68,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
69
68
  validator.check_value_type("beta1", beta1, [float], prim_name)
70
69
  validator.check_value_type("beta2", beta2, [float], prim_name)
71
70
  validator.check_value_type("eps", eps, [float], prim_name)
72
- validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name)
73
- validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name)
71
+ validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
72
+ validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
74
73
  validator.check_positive_float(eps, "eps", prim_name)
75
74
 
76
75
 
@@ -133,7 +132,7 @@ class Lamb(Optimizer):
133
132
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
134
133
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
135
134
  As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
136
- document `LossScale <https://www.mindspore.cn/tutorials/zh-CN/r2.0.0-alpha/advanced/mixed_precision.html>`_ to
135
+ document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
137
136
  process `loss_scale` correctly.
138
137
 
139
138
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without