PyPI - mindspore - Versions diffs - 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl - Mend

mindspore 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (655) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +4 -2
mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +102 -0
mindspore/_checkparam.py +1066 -1001
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
mindspore/_extends/parse/__init__.py +5 -3
mindspore/_extends/parse/namespace.py +16 -1
mindspore/_extends/parse/parser.py +107 -22
mindspore/_extends/parse/resources.py +0 -7
mindspore/_extends/parse/standard_method.py +885 -413
mindspore/amp.py +52 -57
mindspore/boost/boost.py +2 -2
mindspore/boost/boost_cell_wrapper.py +38 -20
mindspore/boost/dim_reduce.py +3 -3
mindspore/boost/group_loss_scale_manager.py +1 -1
mindspore/common/__init__.py +4 -6
mindspore/common/_decorator.py +2 -0
mindspore/common/_register_for_adapter.py +55 -0
mindspore/common/_stub_tensor.py +201 -0
mindspore/common/_utils.py +41 -7
mindspore/common/api.py +215 -141
mindspore/common/dtype.py +8 -1
mindspore/common/dump.py +2 -2
mindspore/common/initializer.py +4 -2
mindspore/common/jit_config.py +17 -13
mindspore/common/mutable.py +33 -13
mindspore/common/parameter.py +23 -21
mindspore/common/seed.py +8 -24
mindspore/common/sparse_tensor.py +62 -41
mindspore/common/tensor.py +852 -1154
mindspore/communication/__init__.py +2 -2
mindspore/communication/_comm_helper.py +11 -4
mindspore/communication/management.py +22 -21
mindspore/config/op_info.config +501 -1008
mindspore/context.py +201 -23
mindspore/dataset/__init__.py +6 -6
mindspore/dataset/audio/__init__.py +7 -7
mindspore/dataset/audio/transforms.py +670 -30
mindspore/dataset/audio/utils.py +47 -4
mindspore/dataset/audio/validators.py +223 -1
mindspore/dataset/callback/ds_callback.py +2 -2
mindspore/dataset/core/config.py +210 -14
mindspore/dataset/core/validator_helpers.py +2 -2
mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
mindspore/dataset/debug/debug_hook.py +65 -0
mindspore/dataset/debug/pre_defined_hook.py +67 -0
mindspore/dataset/engine/__init__.py +7 -3
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +322 -66
mindspore/dataset/engine/datasets_audio.py +80 -76
mindspore/dataset/engine/datasets_standard_format.py +51 -38
mindspore/dataset/engine/datasets_text.py +232 -118
mindspore/dataset/engine/datasets_user_defined.py +41 -17
mindspore/dataset/engine/datasets_vision.py +746 -225
mindspore/dataset/engine/graphdata.py +75 -10
mindspore/dataset/engine/iterators.py +45 -5
mindspore/dataset/engine/offload.py +48 -28
mindspore/dataset/engine/validators.py +117 -8
mindspore/dataset/text/__init__.py +6 -5
mindspore/dataset/text/transforms.py +86 -3
mindspore/dataset/text/utils.py +6 -4
mindspore/dataset/text/validators.py +25 -0
mindspore/dataset/transforms/__init__.py +3 -2
mindspore/dataset/transforms/c_transforms.py +1 -1
mindspore/dataset/transforms/transforms.py +2 -2
mindspore/dataset/utils/__init__.py +2 -1
mindspore/dataset/utils/line_reader.py +121 -0
mindspore/dataset/vision/__init__.py +2 -3
mindspore/dataset/vision/c_transforms.py +9 -9
mindspore/dataset/vision/py_transforms.py +5 -5
mindspore/dataset/vision/py_transforms_util.py +2 -0
mindspore/dataset/vision/transforms.py +160 -161
mindspore/dataset/vision/utils.py +3 -3
mindspore/experimental/map_parameter.py +38 -26
mindspore/include/OWNERS +0 -1
mindspore/include/api/callback/callback.h +9 -13
mindspore/include/api/callback/ckpt_saver.h +2 -2
mindspore/include/api/callback/loss_monitor.h +2 -2
mindspore/include/api/callback/lr_scheduler.h +5 -5
mindspore/include/api/callback/time_monitor.h +2 -2
mindspore/include/api/callback/train_accuracy.h +4 -6
mindspore/include/api/cfg.h +19 -6
mindspore/include/api/context.h +44 -9
mindspore/include/api/delegate.h +1 -1
mindspore/include/api/metrics/accuracy.h +2 -2
mindspore/include/api/metrics/metrics.h +4 -3
mindspore/include/api/model.h +9 -4
mindspore/include/api/model_parallel_runner.h +2 -2
mindspore/include/api/net.h +12 -11
mindspore/include/api/serialization.h +19 -3
mindspore/include/api/types.h +3 -3
mindspore/include/dataset/constants.h +7 -0
mindspore/include/dataset/text.h +59 -0
mindspore/jpeg62.dll +0 -0
mindspore/log.py +1 -1
mindspore/mindrecord/filereader.py +18 -0
mindspore/mindrecord/filewriter.py +197 -34
mindspore/mindrecord/shardreader.py +9 -0
mindspore/mindrecord/shardwriter.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
mindspore/mindrecord/tools/csv_to_mr.py +3 -3
mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/__init__.py +0 -4
mindspore/nn/cell.py +204 -132
mindspore/nn/dynamic_lr.py +1 -1
mindspore/nn/grad/cell_grad.py +7 -6
mindspore/nn/layer/__init__.py +5 -4
mindspore/nn/layer/activation.py +40 -89
mindspore/nn/layer/basic.py +255 -624
mindspore/nn/layer/channel_shuffle.py +7 -6
mindspore/nn/layer/combined.py +1 -1
mindspore/nn/layer/container.py +41 -4
mindspore/nn/layer/conv.py +64 -28
mindspore/nn/layer/dense.py +9 -8
mindspore/nn/layer/embedding.py +27 -25
mindspore/nn/layer/image.py +53 -46
mindspore/nn/layer/math.py +97 -105
mindspore/nn/layer/normalization.py +117 -86
mindspore/nn/layer/padding.py +185 -95
mindspore/nn/layer/pooling.py +817 -414
mindspore/nn/layer/rnn_cells.py +10 -15
mindspore/nn/layer/rnns.py +37 -38
mindspore/nn/layer/thor_layer.py +11 -12
mindspore/nn/layer/timedistributed.py +5 -5
mindspore/nn/layer/transformer.py +701 -0
mindspore/nn/learning_rate_schedule.py +8 -8
mindspore/nn/loss/__init__.py +5 -4
mindspore/nn/loss/loss.py +334 -199
mindspore/nn/optim/ada_grad.py +6 -6
mindspore/nn/optim/adadelta.py +2 -3
mindspore/nn/optim/adafactor.py +4 -5
mindspore/nn/optim/adam.py +126 -62
mindspore/nn/optim/adamax.py +3 -4
mindspore/nn/optim/adasum.py +6 -6
mindspore/nn/optim/asgd.py +2 -2
mindspore/nn/optim/ftrl.py +67 -38
mindspore/nn/optim/lamb.py +4 -5
mindspore/nn/optim/lars.py +2 -2
mindspore/nn/optim/lazyadam.py +43 -4
mindspore/nn/optim/momentum.py +6 -5
mindspore/nn/optim/optimizer.py +3 -1
mindspore/nn/optim/proximal_ada_grad.py +2 -2
mindspore/nn/optim/rmsprop.py +1 -1
mindspore/nn/optim/rprop.py +8 -9
mindspore/nn/optim/sgd.py +19 -13
mindspore/nn/optim/thor.py +10 -15
mindspore/nn/probability/__init__.py +0 -2
mindspore/nn/probability/bijector/bijector.py +4 -4
mindspore/nn/probability/bijector/invert.py +1 -1
mindspore/nn/probability/bijector/softplus.py +2 -2
mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
mindspore/nn/probability/distribution/_utils/utils.py +9 -15
mindspore/nn/probability/distribution/bernoulli.py +3 -3
mindspore/nn/probability/distribution/beta.py +1 -1
mindspore/nn/probability/distribution/categorical.py +5 -7
mindspore/nn/probability/distribution/cauchy.py +3 -3
mindspore/nn/probability/distribution/distribution.py +2 -2
mindspore/nn/probability/distribution/exponential.py +2 -2
mindspore/nn/probability/distribution/gamma.py +3 -3
mindspore/nn/probability/distribution/geometric.py +1 -1
mindspore/nn/probability/distribution/gumbel.py +3 -3
mindspore/nn/probability/distribution/half_normal.py +15 -11
mindspore/nn/probability/distribution/laplace.py +16 -13
mindspore/nn/probability/distribution/logistic.py +2 -2
mindspore/nn/probability/distribution/normal.py +1 -1
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/student_t.py +20 -15
mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
mindspore/nn/probability/distribution/uniform.py +2 -2
mindspore/nn/reinforcement/_tensors_queue.py +3 -3
mindspore/nn/reinforcement/tensor_array.py +2 -2
mindspore/nn/sparse/sparse.py +2 -2
mindspore/nn/wrap/cell_wrapper.py +27 -10
mindspore/nn/wrap/grad_reducer.py +2 -2
mindspore/nn/wrap/loss_scale.py +40 -24
mindspore/numpy/array_creations.py +33 -22
mindspore/numpy/array_ops.py +35 -30
mindspore/numpy/logic_ops.py +6 -27
mindspore/numpy/math_ops.py +22 -19
mindspore/numpy/utils.py +1 -1
mindspore/numpy/utils_const.py +108 -58
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/_constants.py +0 -6
mindspore/ops/_grad/__init__.py +2 -1
mindspore/ops/_grad/grad_array_ops.py +86 -117
mindspore/ops/_grad/grad_base.py +23 -1
mindspore/ops/_grad/grad_clip_ops.py +2 -3
mindspore/ops/_grad/grad_comm_ops.py +34 -24
mindspore/ops/_grad/grad_implementations.py +9 -45
mindspore/ops/_grad/grad_inner_ops.py +47 -4
mindspore/ops/_grad/grad_math_ops.py +142 -117
mindspore/ops/_grad/grad_nn_ops.py +71 -165
mindspore/ops/_grad/grad_sequence_ops.py +296 -0
mindspore/ops/_grad/grad_sparse.py +7 -6
mindspore/ops/_grad_experimental/__init__.py +1 -0
mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
mindspore/ops/_op_impl/aicpu/abs.py +36 -0
mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
mindspore/ops/_op_impl/aicpu/add.py +43 -0
mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
mindspore/ops/_op_impl/aicpu/conj.py +11 -0
mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
mindspore/ops/_op_impl/aicpu/greater.py +41 -0
mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
mindspore/ops/_op_impl/aicpu/less.py +41 -0
mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
mindspore/ops/_op_impl/aicpu/mul.py +3 -1
mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
mindspore/ops/_op_impl/aicpu/polar.py +32 -0
mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
mindspore/ops/_op_impl/tbe/__init__.py +27 -611
mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
mindspore/ops/_op_impl/tbe/cast.py +0 -2
mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
mindspore/ops/_register_for_op.py +1 -0
mindspore/ops/_utils/__init__.py +1 -2
mindspore/ops/_utils/utils.py +19 -40
mindspore/ops/_vmap/vmap_array_ops.py +116 -38
mindspore/ops/_vmap/vmap_base.py +16 -9
mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
mindspore/ops/_vmap/vmap_image_ops.py +12 -5
mindspore/ops/_vmap/vmap_math_ops.py +46 -5
mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
mindspore/ops/_vmap/vmap_random_ops.py +1 -1
mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
mindspore/ops/composite/__init__.py +7 -8
mindspore/ops/composite/base.py +101 -47
mindspore/ops/composite/math_ops.py +188 -158
mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
mindspore/ops/function/__init__.py +152 -8
mindspore/ops/function/array_func.py +2555 -674
mindspore/ops/function/clip_func.py +209 -13
mindspore/ops/function/debug_func.py +2 -2
mindspore/ops/function/grad/__init__.py +2 -1
mindspore/ops/function/grad/grad_func.py +147 -62
mindspore/ops/function/image_func.py +54 -38
mindspore/ops/function/linalg_func.py +167 -16
mindspore/ops/function/math_func.py +4849 -1492
mindspore/ops/function/nn_func.py +2573 -988
mindspore/ops/function/other_func.py +115 -0
mindspore/ops/function/parameter_func.py +3 -3
mindspore/ops/function/random_func.py +790 -73
mindspore/ops/function/sparse_func.py +98 -78
mindspore/ops/function/sparse_unary_func.py +54 -53
mindspore/ops/function/spectral_func.py +27 -24
mindspore/ops/function/vmap_func.py +22 -2
mindspore/ops/functional.py +97 -37
mindspore/ops/op_info_register.py +70 -28
mindspore/ops/operations/__init__.py +47 -14
mindspore/ops/operations/_csr_ops.py +7 -7
mindspore/ops/operations/_embedding_cache_ops.py +5 -5
mindspore/ops/operations/_grad_ops.py +276 -187
mindspore/ops/operations/_inner_ops.py +319 -113
mindspore/ops/operations/_ms_kernel.py +10 -8
mindspore/ops/operations/_ocr_ops.py +9 -9
mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
mindspore/ops/operations/_quant_ops.py +137 -102
mindspore/ops/operations/_rl_inner_ops.py +121 -60
mindspore/ops/operations/_scalar_ops.py +466 -0
mindspore/ops/operations/_sequence_ops.py +1004 -2
mindspore/ops/operations/_tensor_array.py +10 -11
mindspore/ops/operations/_thor_ops.py +1 -1
mindspore/ops/operations/array_ops.py +801 -466
mindspore/ops/operations/comm_ops.py +51 -49
mindspore/ops/operations/control_ops.py +2 -2
mindspore/ops/operations/custom_ops.py +123 -44
mindspore/ops/operations/debug_ops.py +24 -24
mindspore/ops/operations/image_ops.py +240 -153
mindspore/ops/operations/inner_ops.py +34 -50
mindspore/ops/operations/linalg_ops.py +31 -9
mindspore/ops/operations/math_ops.py +988 -757
mindspore/ops/operations/nn_ops.py +965 -819
mindspore/ops/operations/other_ops.py +51 -40
mindspore/ops/operations/random_ops.py +204 -122
mindspore/ops/operations/rl_ops.py +8 -9
mindspore/ops/operations/sparse_ops.py +254 -93
mindspore/ops/operations/spectral_ops.py +35 -3
mindspore/ops/primitive.py +111 -9
mindspore/parallel/_auto_parallel_context.py +189 -83
mindspore/parallel/_offload_context.py +185 -0
mindspore/parallel/_parallel_serialization.py +99 -7
mindspore/parallel/_ps_context.py +9 -5
mindspore/parallel/_recovery_context.py +1 -1
mindspore/parallel/_tensor.py +7 -1
mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
mindspore/parallel/_utils.py +1 -2
mindspore/parallel/algo_parameter_config.py +1 -1
mindspore/parallel/checkpoint_transform.py +37 -34
mindspore/parallel/shard.py +17 -18
mindspore/profiler/common/validator/validate_path.py +2 -2
mindspore/profiler/envprofiling.py +69 -47
mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
mindspore/profiler/parser/base_timeline_generator.py +49 -56
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
mindspore/profiler/parser/hwts_log_parser.py +1 -1
mindspore/profiler/parser/integrator.py +15 -14
mindspore/profiler/parser/minddata_analyzer.py +2 -2
mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
mindspore/profiler/parser/msadvisor_parser.py +2 -4
mindspore/profiler/parser/optime_parser.py +17 -18
mindspore/profiler/parser/profiler_info.py +2 -1
mindspore/profiler/profiling.py +218 -186
mindspore/rewrite/__init__.py +3 -1
mindspore/rewrite/api/node.py +1 -114
mindspore/rewrite/api/node_type.py +3 -0
mindspore/rewrite/api/pattern_engine.py +31 -1
mindspore/rewrite/api/scoped_value.py +4 -4
mindspore/rewrite/api/symbol_tree.py +3 -78
mindspore/rewrite/api/tree_node_helper.py +1 -1
mindspore/rewrite/ast_creator_register.py +1 -0
mindspore/rewrite/ast_helpers/__init__.py +2 -2
mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
mindspore/rewrite/namespace.py +0 -2
mindspore/rewrite/node.py +157 -11
mindspore/rewrite/parsers/assign_parser.py +231 -53
mindspore/rewrite/parsers/class_def_parser.py +187 -109
mindspore/rewrite/parsers/for_parser.py +24 -14
mindspore/rewrite/parsers/function_def_parser.py +21 -4
mindspore/rewrite/parsers/if_parser.py +6 -2
mindspore/rewrite/sparsify/__init__.py +0 -0
mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
mindspore/rewrite/sparsify/sparsify.py +109 -0
mindspore/rewrite/sparsify/utils.py +173 -0
mindspore/rewrite/symbol_tree.py +256 -133
mindspore/rewrite/symbol_tree_builder.py +38 -1
mindspore/run_check/_check_version.py +69 -63
mindspore/run_check/run_check.py +2 -1
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +1 -1
mindspore/train/_utils.py +28 -5
mindspore/train/amp.py +273 -102
mindspore/train/callback/_backup_and_restore.py +5 -5
mindspore/train/callback/_callback.py +2 -2
mindspore/train/callback/_checkpoint.py +3 -3
mindspore/train/callback/_early_stop.py +3 -3
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +29 -31
mindspore/train/callback/_loss_monitor.py +3 -3
mindspore/train/callback/_on_request_exit.py +3 -3
mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
mindspore/train/callback/_summary_collector.py +23 -16
mindspore/train/callback/_time_monitor.py +3 -3
mindspore/train/checkpoint_pb2.py +68 -8
mindspore/train/data_sink.py +15 -3
mindspore/train/dataset_helper.py +10 -15
mindspore/train/loss_scale_manager.py +8 -11
mindspore/train/metrics/__init__.py +1 -1
mindspore/train/metrics/bleu_score.py +1 -1
mindspore/train/metrics/confusion_matrix.py +1 -1
mindspore/train/metrics/cosine_similarity.py +1 -1
mindspore/train/metrics/dice.py +2 -2
mindspore/train/metrics/fbeta.py +1 -1
mindspore/train/metrics/hausdorff_distance.py +4 -3
mindspore/train/metrics/mean_surface_distance.py +2 -2
mindspore/train/metrics/occlusion_sensitivity.py +1 -1
mindspore/train/metrics/perplexity.py +1 -1
mindspore/train/metrics/precision.py +1 -1
mindspore/train/metrics/recall.py +1 -1
mindspore/train/metrics/roc.py +2 -2
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +116 -37
mindspore/train/model.py +45 -28
mindspore/train/serialization.py +295 -188
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +43 -13
mindspore/train/train_thor/convert_utils.py +2 -2
mindspore/train/train_thor/dataset_helper.py +3 -3
mindspore/turbojpeg.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
{mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
mindspore/compression/__init__.py +0 -19
mindspore/compression/common/constant.py +0 -124
mindspore/compression/export/__init__.py +0 -19
mindspore/compression/export/quant_export.py +0 -515
mindspore/compression/quant/__init__.py +0 -28
mindspore/compression/quant/qat.py +0 -634
mindspore/compression/quant/quant_utils.py +0 -462
mindspore/compression/quant/quantizer.py +0 -68
mindspore/nn/layer/quant.py +0 -1868
mindspore/nn/layer/rnn_utils.py +0 -90
mindspore/nn/probability/dpn/__init__.py +0 -22
mindspore/nn/probability/dpn/vae/__init__.py +0 -25
mindspore/nn/probability/dpn/vae/cvae.py +0 -140
mindspore/nn/probability/dpn/vae/vae.py +0 -124
mindspore/nn/probability/infer/__init__.py +0 -22
mindspore/nn/probability/infer/variational/elbo.py +0 -70
mindspore/nn/probability/infer/variational/svi.py +0 -84
mindspore/nn/probability/toolbox/__init__.py +0 -22
mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
mindspore/nn/probability/transforms/__init__.py +0 -22
mindspore/nn/probability/transforms/transform_bnn.py +0 -262
mindspore/nn/probability/zhusuan/__init__.py +0 -18
mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
mindspore/ops/composite/array_ops.py +0 -241
mindspore/ops/composite/clip_ops.py +0 -134
mindspore/ops/composite/random_ops.py +0 -426
mindspore/ops/composite/vmap_ops.py +0 -38
mindspore/parallel/nn/__init__.py +0 -42
mindspore/parallel/nn/loss.py +0 -22
mindspore/parallel/nn/moe.py +0 -21
mindspore/parallel/nn/op_parallel_config.py +0 -22
mindspore/parallel/nn/transformer.py +0 -31
{mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -23,8 +23,7 @@ from mindspore import log as logger
 from mindspore._checkparam import _check_3d_int_or_tuple
 from mindspore import context
 from mindspore.ops import signature as sig
-from mindspore._checkparam import Validator as validator
-from mindspore._checkparam import Rel
+from mindspore import _checkparam as validator
 from mindspore.common import dtype as mstype
 from mindspore.common._decorator import deprecated
 from mindspore.ops.primitive import Primitive
@@ -117,7 +116,7 @@ class CeLU(Primitive):
     def __init__(self, alpha=1.0):
         """Initialize CeLU"""
         validator.check_value_type("alpha", alpha, [float], self.name)
-        validator.check_float(alpha, 0.0, Rel.NE, "alpha", self.name)
+        validator.check_float(alpha, 0.0, validator.NE, "alpha", self.name)
         self.alpha = alpha
         self.add_prim_attr('alpha', self.alpha)
@@ -148,10 +147,13 @@ class AdaptiveAvgPool3D(Primitive):
     r"""
     AdaptiveAvgPool3D operation.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Refer to :func:`mindspore.ops.adaptive_avg_pool3d` for more details.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
@@ -183,7 +185,7 @@ class AdaptiveAvgPool3D(Primitive):
         for i, size in enumerate(self.output_size):
             validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
             if size is not None:
-                validator.check_number(f"output_size[{i}]", size, 0, Rel.GE, self.name)
+                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
         self.output_size = tuple(-1 if val is None else val for val in self.output_size)
@@ -191,67 +193,21 @@ class AdaptiveAvgPool3D(Primitive):
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
-class AdaptiveAvgPool2DV1(Primitive):
+class AdaptiveAvgPool2D(Primitive):
     r"""
-    AdaptiveAvgPool2DV1 operation.
-    This operator applies a 2D adaptive average pooling to an input signal composed of multiple input planes.
-    That is, for any input size, the size of the specified output is H x W.
-    The number of output features is equal to the number of input planes.
+    AdaptiveAvgPool2D operation.
-    The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels,
-    H is the feature height, and W is the feature width.
-    For AdaptiveAvgPool2DV1:
-    ..  math::
-        \begin{align}
-        h_{start} &= floor(i * H_{in} / H_{out})\\
-        h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\
-        w_{start} &= floor(j * W_{in} / W_{out})\\
-        w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\
-        Output(i,j) &= \frac{\sum Input[h_{start}:h_{end}, w_{start}:w_{end}]}{(h_{end}- h_{start})
-        * (w_{end}- w_{start})}
-        \end{align}
-    Args:
-        - output_size (Union[int, tuple]): The target output size is H x W.
-          ouput_size can be a tuple, or a single H for H x H, and H and W can be int or None
-          which means the output size is the same as the input.
-    Inputs:
-        - **input_x** (Tensor) - The input of AdaptiveAvgPool2DV1, which is a 3D or 4D tensor,
-          with float16 or float32 data type.
-    Outputs:
-        Tensor, with the same type as the `input_x`.
-        Shape of the output is `input_x_shape[:len(input_x_shape) - len(out_shape)] + out_shape`.
-    .. math::
-        out\_shape = \begin{cases}
-        input\_x\_shape[-2] + output\_size[1], & \text{if output_size is (None, w);}\\
-        output\_size[0] + input\_x\_shape[-1], & \text{if output_size is (h, None);}\\
-        input\_x\_shape[-2:], & \text{if output_size is (None, None);}\\
-        (h, h), & \text{if output_size is h;}\\
-        (h, w), & \text{if output_size is (h, w)}
-        \end{cases}
-    Raises:
-        TypeError: If `input_x` is not a tensor.
-        TypeError: If dtype of `input_x` is not float16 nor float32.
-        ValueError: If `output_size` is a tuple and the length of `output_size` is not 2.
-        ValueError: If the dimension of `input_x` is less than or equal to the dimension of `output_size`.
+    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``GPU``
     Examples:
         >>> # case 1: output_size=(None, 2)
         >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
         ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
         ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]), mindspore.float32)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2DV1((None, 2))
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
         [[[1.5 2.5]
@@ -264,7 +220,7 @@ class AdaptiveAvgPool2DV1(Primitive):
           [4.5 5.5]
           [7.5 8.5]]]
         >>> # case 2: output_size=2
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2DV1(2)
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
         [[[3. 4.]
@@ -274,7 +230,7 @@ class AdaptiveAvgPool2DV1(Primitive):
          [[3. 4.]
           [6. 7.]]]
         >>> # case 3: output_size=(1, 2)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2DV1((1, 2))
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
         [[[4.5 5.5]]
@@ -284,77 +240,38 @@ class AdaptiveAvgPool2DV1(Primitive):
     @prim_attr_register
     def __init__(self, output_size):
-        """Initialize AdaptiveAvgPool2DV1."""
+        """Initialize AdaptiveAvgPool2D."""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type("output_size", output_size, [int, tuple], self.name)
         if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, Rel.EQ, 'length of output_size', self.name)
+            validator.check_int(len(output_size), 2, validator.EQ, 'length of output_size', self.name)
         self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
         for i, size in enumerate(self.output_size):
             validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
             if size is not None:
-                validator.check_number(f"output_size[{i}]", size, 0, Rel.GE, self.name)
+                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
         self.output_size = tuple(-1 if val is None else val for val in self.output_size)
         self.add_prim_attr('output_size', self.output_size)
-class AdaptiveAvgPool2D(AdaptiveAvgPool2DV1):
+class AdaptiveMaxPool2D(Primitive):
     r"""
-    2D adaptive average pooling for temporal data.
+    Performs 2D adaptive max pooling on a multi-plane input signal.
-    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more details.
-    Supported Platforms:
-        ``GPU``
-    Examples:
-        >>> # case 1: output_size=(None, 2)
-        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]]]
-        >>> # case 2: output_size=2
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]]]
-        >>> # case 3: output_size=(1, 2)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[4.5 5.5]]
-          [[4.5 5.5]]
-          [[4.5 5.5]]]]
-    """
-    @prim_attr_register
-    def __init__(self, output_size):
-        """Initialize AdaptiveAvgPool2D."""
-        super(AdaptiveAvgPool2D, self).__init__(output_size)
+    Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
+    Args:
+        output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
+            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
+            If it is None, it means the output size is the same as the input size.
-class AdaptiveMaxPool2D(Primitive):
-    r"""
-    Applies a 2D adaptive max pooling over an input signal composed of several input planes.
+    Inputs:
+        - **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor,
+          with float16, float32 or float64 data type.
-    Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
+    Outputs:
+        Tensor, with the same type as the `input_x`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -366,7 +283,7 @@ class AdaptiveMaxPool2D(Primitive):
         ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2))
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[2. 3.]
            [5. 6.]
            [8. 9.]]
@@ -379,7 +296,7 @@ class AdaptiveMaxPool2D(Primitive):
         >>> # case 2: output_size=2
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2)
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[5. 6.]
            [8. 9.]]
           [[5. 6.]
@@ -389,35 +306,45 @@ class AdaptiveMaxPool2D(Primitive):
         >>> # case 3: output_size=(1, 2)
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2))
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[8. 9.]]
           [[8. 9.]]
           [[8. 9.]]]]
     """
     @prim_attr_register
-    def __init__(self, output_size, return_indices=False):
+    def __init__(self, output_size):
         """Initialize AdaptiveMaxPool2D."""
         validator.check_value_type("output_size", output_size, [int, tuple], self.name)
-        validator.check_value_type("return_indices", return_indices, [bool], self.name)
         if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, Rel.EQ,
+            validator.check_int(len(output_size), 2, validator.EQ,
                                 'length of output_size', self.name)
         self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
         self.output_size = (-1 if self.output_size[0] is None else self.output_size[0],
                             -1 if self.output_size[1] is None else self.output_size[1])
         for size in self.output_size:
-            validator.check_number("output_size", size, -1, Rel.GE, None)
+            validator.check_number("output_size", size, -1, validator.GE, None)
         self.add_prim_attr('output_size', self.output_size)
-        self.add_prim_attr('return_indices', return_indices)
 class AdaptiveMaxPool3D(Primitive):
     r"""
-    Applies a 3D adaptive max pooling over an input signal composed of several input planes.
+    Performs 3D adaptive max pooling on a multi-plane input signal.
     Refer to :func:`mindspore.ops.adaptive_max_pool3d` for more details.
+    Inputs:
+        - **x** (Tensor) - Tensor, with shape :math:`(C, D, H, W)` or :math:`(N, C, D, H, W)`.
+        - **output_size** (Union[int, tuple]) - The specified output size, which is an integer that represents depth,
+          height and width, or a tuple of three int numbers that represent depth, height and width respectively.
+          The value must be a positive integer. If it is None, the output size and input size of the corresponding
+          dimension are the same.
+    Outputs:
+        - **y** (Tensor) - Tensor, with the same number of dims and data type as the `input`.
+        - **argmax** (Tensor) - Tensor, the indices of max value, which has the same shape as the
+          `y` and it's data type is int32.
     Supported Platforms:
         ``GPU`` ``CPU``
@@ -504,7 +431,7 @@ class Softplus(Primitive):
     .. math::
-        \text{output} = \log(1 + \exp(\text{x})),
+        \text{output} = \log(1 + \exp(\text{x}))
     Inputs:
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
@@ -595,7 +522,7 @@ class ReLUV3(Primitive):
     Inputs:
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, data type is
-          `number <https://www.mindspore.cn/docs/en/r2.0.0-alpha/api_python/mindspore.html#mindspore.dtype>`_.
+          `number <https://www.mindspore.cn/docs/en/r2.0/api_python/mindspore.html#mindspore.dtype>`_.
     Outputs:
         Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`.
@@ -651,15 +578,13 @@ class Mish(PrimitiveWithInfer):
         >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> mish = ops.Mish()
         >>> output = mish(x)
-        >>> print(output)
-        [[-0.3034014  3.9974129 -0.0026832]
-         [ 1.9439590  -0.0033576 9.0000000]]
+        >>> print(output.shape)
+        (2, 3)
     """
     @prim_attr_register
     def __init__(self):
         """Initialize Mish"""
-        super().__init__("Mish")
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
@@ -707,7 +632,6 @@ class SeLU(Primitive):
     @prim_attr_register
     def __init__(self):
         """Initialize SeLU"""
-        super().__init__("SeLU")
         self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
@@ -834,7 +758,7 @@ class Elu(Primitive):
     def __init__(self, alpha=1.0):
         """Initialize Elu"""
         validator.check_value_type("alpha", alpha, [float], self.name)
-        validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name)
+        validator.check_number("alpha", alpha, 1.0, validator.EQ, self.name)
         self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
@@ -887,25 +811,7 @@ class HSigmoid(Primitive):
     r"""
     Hard sigmoid activation function.
-    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
-    Hard sigmoid is defined as:
-    .. math::
-        \text{hsigmoid}(x_{i}) = max(0, min(1, \frac{x_{i} + 3}{6})),
-    where :math:`x_i` is an element of the input Tensor.
-    Inputs:
-        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
-    Outputs:
-        Tensor, with the same type and shape as the `input_x`.
-    Raises:
-        TypeError: If `input_x` is not a Tensor.
+    Refer to :func:`mindspore.ops.hardsigmoid` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1048,8 +954,8 @@ class InstanceNorm(PrimitiveWithInfer):
         """Initialize InstanceNorm."""
         self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'],
                                 outputs=['y', 'save_mean', 'save_variance'])
-        self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
-        self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
+        self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        self.momentum = validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
         self._update_parameter = True
         self.add_prim_attr('side_effect_mem', True)
@@ -1153,8 +1059,8 @@ class InstanceNormV2(Primitive):
                                 outputs=['y', 'batch_mean', 'batch_variance'])
         validator.check_is_float(epsilon, 'epsilon', self.name)
         validator.check_is_float(momentum, 'momentum', self.name)
-        validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
-        validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
+        validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
         validator.check_bool(is_training, "is_training", self.name)
@@ -1196,8 +1102,8 @@ class BNTrainingUpdate(Primitive):
         validator.check_value_type("isRef", isRef, [bool], self.name)
         validator.check_value_type("epsilon", epsilon, [float], self.name)
         validator.check_value_type("factor", factor, [float], self.name)
-        self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
-        self.factor = validator.check_float_range(factor, 0, 1, Rel.INC_BOTH, 'factor', 'BNTrainingUpdate')
+        self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
+        self.factor = validator.check_float_range(factor, 0, 1, validator.INC_BOTH, 'factor', 'BNTrainingUpdate')
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
@@ -1221,8 +1127,9 @@ class BatchNorm(PrimitiveWithInfer):
         y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
-    where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon, :math:`mean` is the mean of x,
-    :math:`variance` is the variance of x.
+    where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon,
+    :math:`mean` is the mean of :math:`x`,
+    :math:`variance` is the variance of :math:`x`.
     .. warning::
         - If the operation is used for inference, and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -1236,8 +1143,8 @@ class BatchNorm(PrimitiveWithInfer):
         momentum (float): The hyper parameter to compute moving average for running_mean and running_var
             (e.g. :math:`new\_running\_mean = (1 - momentum) * running\_mean + momentum * current\_mean`).
             Momentum value must be [0, 1]. Default: 0.1.
-        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
-            Default: "NCHW".
+        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW', and the 'NHWC' format
+            is only supported in GPU target. Default: "NCHW".
     Inputs:
         If `is_training` is False, inputs are Tensors.
@@ -1273,7 +1180,7 @@ class BatchNorm(PrimitiveWithInfer):
         TypeError: If dtype of `input_x`, `scale` is neither float16 nor float32.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> input_x = Tensor(np.ones([2, 2]), mindspore.float32)
@@ -1304,8 +1211,8 @@ class BatchNorm(PrimitiveWithInfer):
         else:
             self.add_prim_attr('side_effect_mem', True)
         validator.check_value_type('is_training', is_training, (bool,), self.name)
-        validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
-        validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
+        validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
@@ -1318,12 +1225,12 @@ class BatchNorm(PrimitiveWithInfer):
     def infer_shape(self, input_x, scale, bias, mean, variance):
         input_x_channel = input_x[-1] if self.format == "NHWC" else input_x[1]
         validator.check_equal_int(len(scale), 1, "scale rank", self.name)
-        validator.check("scale shape", scale, "bias shape", bias, Rel.EQ, self.name)
-        validator.check("scale shape[0]", scale[0], "input_x channel", input_x_channel, Rel.EQ, self.name)
+        validator.check("scale shape", scale, "bias shape", bias, validator.EQ, self.name)
+        validator.check("scale shape[0]", scale[0], "input_x channel", input_x_channel, validator.EQ, self.name)
         if not self.is_training:
             validator.check_equal_int(len(mean), 1, "mean rank", self.name)
-            validator.check("mean shape", mean, "variance shape", variance, Rel.EQ, self.name)
-            validator.check("mean shape", mean, "scale shape", scale, Rel.EQ, self.name)
+            validator.check("mean shape", mean, "variance shape", variance, validator.EQ, self.name)
+            validator.check("mean shape", mean, "scale shape", scale, validator.EQ, self.name)
         return input_x, scale, scale, scale, scale
     def infer_dtype(self, input_x, scale, bias, mean, variance):
@@ -1337,7 +1244,90 @@ class Conv2D(Primitive):
     r"""
     2D convolution layer.
-    Refer to :func:`mindspore.ops.conv2d` for more details.
+    Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
+    where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
+    the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
+    For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
+    .. math::
+        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
+    where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
+    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
+    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
+    of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the
+    convolution kernel. The full kernel has shape
+    :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where group is the group number to split the input in the channel dimension.
+    If the 'pad_mode' is set to be "pad", the output height and width will be
+    :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
+    (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
+    :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
+    (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` respectively.
+    Where :math:`dilation` is Spacing between kernel elements, :math:`stride` is The step length of each step,
+    :math:`padding` is zero-padding added to both sides of the input.
+    The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
+    Note:
+        On Ascend platform, :math:`group = 1` must be satisfied.
+    Args:
+        out_channel (int): The number of output channel :math:`C_{out}`.
+        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
+            and width of the 2D convolution window. Single int means the value is for both the height and the width of
+            the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
+            width of the kernel.
+        mode (int): Modes for different convolutions. The value is currently not used. Default: 1.
+        pad_mode (str): Specifies padding mode. The optional values are
+            "same", "valid" and "pad". Default: "valid".
+            - same: Adopts the way of completion. The height and width of the output will be equal to
+              the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
+              left and right possiblily.
+              Otherwise, the last extra padding will be calculated from the bottom and the right side.
+              If this mode is set, `pad` must be 0.
+            - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
+              without padding. Extra pixels will be discarded. If this mode is set, `pad` must be 0.
+            - pad: Implicit paddings on both sides of the input `x`. The number of `pad` will be padded to the input
+              Tensor borders. `pad` must be greater than or equal to 0.
+        pad (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`. If `pad` is one integer,
+                    the paddings of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple
+                    with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
+                    pad[1], pad[2], and pad[3] accordingly. Default: 0.
+        stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
+            the height and width of movement are both strides, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: 1.
+        dilation (Union(int, tuple[int])): The data type is int or a tuple of 2 integers. Specifies the dilation rate
+                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
+                                      be greater than or equal to 1 and bounded by the height and width of the
+                                      input `x`. Default: 1.
+        group (int): Splits input into groups. Default: 1.
+        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: "NCHW".
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+        - **weight** (Tensor) - Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+          then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]}, \text{kernel_size[1]})`.
+    Outputs:
+        Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
+    Raises:
+        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
+        TypeError: If `out_channel` or `group` is not an int.
+        ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
+        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
+        ValueError: If `pad` is a tuple whose length is not equal to 4.
+        ValueError: If `pad_mode` it not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0).
+        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1400,16 +1390,15 @@ class Conv2D(Primitive):
 class DataFormatVecPermute(Primitive):
     r"""
-    Permute input tensor from src_format to dst_format.
+    Converts the input tensor from the `src_format` to the `dst_format` by permuting its dimensions.
     Args:
-        src_format (str, optional): An optional value for source data format. The format can be 'NHWC' and 'NCHW'.
-            Default: 'NHWC'.
-        dst_format (str, optional): An optional value for destination data format. The format can be 'NHWC' and 'NCHW'.
-            Default: 'NCHW'.
+        src_format (str, optional): the source data format, which can be 'NHWC' and 'NCHW'. Default: 'NHWC'.
+        dst_format (str, optional): the target data format, which can be 'NHWC' and 'NCHW'. Default: 'NCHW'.
     Inputs:
-        - **input_x** (Tensor) - A Tensor of shape (4, ) or (4, 2) in source data format. Only supports int32 and int64.
+        - **input_x** (Tensor) - A Tensor of shape :math:`(4, )` or :math:`(4, 2)` in source data format.
+          Supports int32 and int64 datatype.
     Outputs:
         Tensor, has the same data type and shape as the `input_x`.
@@ -1418,10 +1407,10 @@ class DataFormatVecPermute(Primitive):
         TypeError: If `input_x` is not a Tensor.
         TypeError: If dtype of `input_x` is neither int32 nor int64.
         ValueError: If `src_format` or `dst_format` is not a str in ['NHWC', 'NCHW'].
-        ValueError: If input_x shape is not (4, ) or (4, 2).
+        ValueError: If `input_x` shape is not :math:`(4, )` or :math:`(4, 2)`.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -1505,8 +1494,8 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
     def infer_shape(self, x_shape, w_shape, b_shape=None):
         validator.check_equal_int(len(w_shape), 4, "weight rank", self.name)
         validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
-        validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
-        validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name)
+        validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], validator.EQ, self.name)
+        validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), validator.EQ, self.name)
         kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape
         _, _, stride_h, stride_w = self.stride
@@ -1804,55 +1793,11 @@ class MaxPoolV1(Primitive):
 class MaxPoolWithArgmax(Primitive):
     r"""
-    Performs max pooling on the input Tensor and returns both max values and indices.
-    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
-    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
-    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows:
-    .. math::
-        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
-        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
-    Args:
-        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax
-            value, is an int number that represents height and width of the kernel, or a tuple of
-            two int numbers that represent height and width respectively. Default: 1.
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            not only the height of movement but also the width of movement, or a tuple of two int numbers that
-            represent height and width of movement respectively. Default: 1.
-        pad_mode (str): The optional value for pad mode, is "same" or "valid".
-            Default: "valid".
-            - same: Adopts the way of completion. The height and width of the output will be the same as
-              the input. The total number of padding will be calculated in horizontal and vertical
-              directions and evenly distributed to top, bottom, left and right if possible.
-              Otherwise, the last extra padding will be done from the bottom and the right side.
-            - valid: Adopts the way of discarding. The possible largest height and width of output
-              will be returned without padding. Extra pixels will be discarded.
-        data_format (str) : The optional value for data format, is 'NHWC' or 'NCHW'.
-            Default: 'NCHW'.
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-          Data type must be float16 or float32.
-    Outputs:
-        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
-        - **output** (Tensor) -  Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-          It has the same data type as `x`.
-        - **mask** (Tensor) -  Max values' index represented by the mask. Data type is int32.
-    Raises:
-        TypeError: If the data type of `x` is neither float16 nor float32.
-        TypeError: If `kernel_size` or `strides` is neither an int nor a tuple.
-        TypeError: If `x` is not a Tensor.
+    `ops.MaxPoolWithArgmax` is deprecated from version 2.0 and will be removed in a future version,
+    use `ops.MaxPoolWithArgmaxV2` instead.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        Deprecated
     Examples:
         >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
@@ -1867,6 +1812,7 @@ class MaxPoolWithArgmax(Primitive):
            [33. 34. 35.]]]]
     """
+    @deprecated("2.0", "ops.MaxPoolWithArgmaxV2", False)
     @prim_attr_register
     def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
         """Initialize MaxPoolWithArgmax."""
@@ -1891,7 +1837,7 @@ class MaxPoolWithArgmax(Primitive):
         self.add_prim_attr("strides", self.strides)
-class MaxPool3D(PrimitiveWithInfer):
+class MaxPool3D(Primitive):
     r"""
     Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
@@ -1936,7 +1882,7 @@ class MaxPool3D(PrimitiveWithInfer):
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
-          Data type must be float16 or float32.
+          Data type must be float16, float32 or float64.
     Outputs:
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the data type of `x`.
@@ -2003,55 +1949,15 @@ class MaxPool3D(PrimitiveWithInfer):
                 validator.check_non_negative_int(item, 'pad_list item', self.name)
         self.add_prim_attr("pad_list", self.pad_list)
-    def infer_shape(self, x_shape):
-        validator.check_equal_int(len(x_shape), 5, "x rank", self.name)
-        batch, channel, input_d, input_h, input_w = x_shape
-        self.add_prim_attr("x_shape", x_shape)
-        _, _, kernel_d, kernel_h, kernel_w = self.kernel_size
-        _, _, stride_d, stride_h, stride_w = self.strides
-        if self.pad_mode == "VALID":
-            out_d = math.ceil((input_d - (kernel_d - 1)) / stride_d)
-            out_h = math.ceil((input_h - (kernel_h - 1)) / stride_h)
-            out_w = math.ceil((input_w - (kernel_w - 1)) / stride_w)
-        elif self.pad_mode == "SAME":
-            out_d = math.ceil(input_d / stride_d)
-            out_h = math.ceil(input_h / stride_h)
-            out_w = math.ceil(input_w / stride_w)
-        else:
-            out_d = ((input_d + self.pad_list[0] + self.pad_list[1] -
-                      (kernel_d - 1) - 1) / stride_d) + 1
-            out_h = ((input_h + self.pad_list[2] + self.pad_list[3] -
-                      (kernel_h - 1) - 1) / stride_h) + 1
-            out_w = ((input_w + self.pad_list[4] + self.pad_list[5] -
-                      (kernel_w - 1) - 1) / stride_w) + 1
-            if self.ceil_mode:
-                out_d = math.ceil(out_d)
-                out_h = math.ceil(out_h)
-                out_w = math.ceil(out_w)
-            else:
-                out_d = math.floor(out_d)
-                out_h = math.floor(out_h)
-                out_w = math.floor(out_w)
-        out_shape = [batch, channel, out_d, out_h, out_w]
-        _check_shape('output', out_shape, self.name)
-        return out_shape
-    def infer_dtype(self, x_dtype):
-        validator.check_tensor_dtype_valid("x", x_dtype, [mstype.float16, mstype.float32], self.name)
-        return x_dtype
 class MaxUnpool2D(Primitive):
     r"""
-    Computes a partial inverse of MaxPool2D.
-    MaxPool2D is not fully invertible, since the non-maximal values are lost.
+    Calculates the partial inverse of MaxPool2D operation.
-    MaxUnpool2D takes in as input the output of MaxPool2D including the indices of
-    the maximal values and computes a partial inverse in which all non-maximal values
-    are set to zero. Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` ,
+    Since MaxPool2D loses non-maximal values, it is not fully invertible.
+    Therefore, MaxUnpool2D takes the output of MaxPool2D, including the indices of
+    the maximal values, and computes a partial inverse where all non-maximal values are set to zero.
+    Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` ,
     the output is of shape :math:`(N, C, H_{out}, W_{out})` , the operation is as follows:
     .. math::
@@ -2060,6 +1966,9 @@ class MaxUnpool2D(Primitive):
         W_{out} = (W{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\
         \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents height and width of the kernel, or a tuple
@@ -2090,7 +1999,7 @@ class MaxUnpool2D(Primitive):
         - **x** (Tensor) - The input Tensor to invert.
           Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C)`.
         - **argmax** (Tensor) - Max values' index represented by the `argmax`.
-          Tensor of shape must be same with input 'x'.
+          Tensor of shape must be same with input `x`.
           Values of `argmax` must belong to :math:`[0, H_{in} \times W_{in} - 1]`.
           Data type must be in int32 or int64.
@@ -2110,7 +2019,7 @@ class MaxUnpool2D(Primitive):
                     computed by attr `ksize`, `strides` and `pads`.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32))
@@ -2148,12 +2057,9 @@ class MaxUnpool2D(Primitive):
 class MaxUnpool3D(Primitive):
     r"""
-    Computes a partial inverse of MaxUnpool3D.
-    MaxUnpool3D is not fully invertible, since the non-maximal values are lost.
+    Computes the inverse of :class:`mindspore.ops.MaxPool3D`.
-    MaxUnpool3D takes in as input the output of MaxUnpool3D including the indices of the maximal
-    values and computes a partial inverse in which all non-maximal values are set to zero.
+    MaxUnpool3D keeps the maximal value and set all position of non-maximal values to zero.
     Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, the output is of
     shape :math:`(N, C, D_{out}, H_{out}, W_{out})`, the operation is as follows.
@@ -2164,55 +2070,63 @@ class MaxUnpool3D(Primitive):
         W_{out} = (W{in} - 1) \times strides[2] - 2 \times pads[2] + ksize[2] \\
         \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents depth, height and width of the kernel, or a tuple
             of three int numbers that represent depth, height and width respectively.
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            the depth, height and width of movement are both strides, or a tuple of three int numbers that
-            represent depth, height and width of movement respectively.
-            If strides is 0 or (0, 0, 0), then strides equal to ksize. Default: 0.
-        pads (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `pads` is an integer,
-            the paddings of depth, height and width are the same, equal to pads. If `pads` is a tuple of three integers,
-            the padding of depth, height and width equal to pads[0], pads[1] and pads[2] correspondingly.
-        output_shape (tuple[int]) : The target output size is an optional input. Default: ().
-            If output_shape == (), then the shape of output computed by kszie, strides and pads.
-            If output_shape != (), then output_shape must be :math:`(N, C, D, H, W)` or
-            :math:`(N, D, H, W, C)` and output_shape must belong to
+        strides (Union[int, tuple[int]], optional): The distance of kernel moving. Default: 0.
+            - If it is an int number, the depth, height and width of movement are all equal to `strides`.
+            - If it is a tuple of three int numbers, they represent depth, height and width of movement respectively.
+            - If strides is 0 or (0, 0, 0), then `strides` equal to `ksize`.
+        pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: 0.
+            - If `pads` is an integer, the paddings of depth, height and width are the same, equal to pads.
+            - If `pads` is a tuple of three integers, the padding of depth, height and width equal to pads[0],
+              pads[1] and pads[2] correspondingly.
+        output_shape (tuple[int], optional) : The target output size. Default: ().
+            If :math:`output\_shape == ()`, then the shape of output computed by kszie, strides and pads shown above.
+            If :math:`output\_shape != ()`, then output_shape format must be :math:`(N, C, D, H, W)` or
+            :math:`(N, D, H, W, C)` and output_shape must be in range
             :math:`[(N, C, D_{out} - strides[0], H_{out} - strides[1], W_{out} - strides[2]),
             (N, C, D_{out} + strides[0], H_{out} + strides[1], W_{out} + strides[2])]`.
-        data_format (str) : The optional value for data format. Currently support 'NCDHW' and 'NDHWC'. Default: 'NCDHW'.
+        data_format (str, optional) : The optional value for data format. Currently
+            support 'NCDHW' and 'NDHWC'. Default: 'NCDHW'.
     Inputs:
         - **x** (Tensor) - The input Tensor to invert.
           Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`.
-        - **argmax** (Tensor) - Max values' index represented by the argmax.
-          Tensor of shape must be same with input 'x'.
-          Values of argmax must belong to :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`.
-          Data type must be in int32 or int64.
+        - **argmax** (Tensor) - Max values' index. Tensor that has the same shape as `x`.
+          Values of `argmax` must be in range :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`.
+          Data type must be int32 or int64.
     Outputs:
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
         Has the same data type with `x`.
     Raises:
-        TypeError: If data type of `x` or `argmax` is not supported.
+        TypeError: If data type of `x` or `argmax` is Number.
         TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple.
-        ValueError: If numbers in `strides` (also support 0 and (0, 0, 0)) or `ksize` is not positive.
+        ValueError: If numbers in `strides` or `ksize` is negative.
         ValueError: If numbers in `pads` is negative.
         ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 3.
         ValueError: If `data_format` is not a str or is neither `NCDHW` nor `NDHWC`.
         ValueError: If `output_shape` whose length is neither 0 or 5.
-        ValueError: If `output_shape` is not close to output size
+        ValueError: If `output_shape` is not close to output size range
                     computed by attr `ksize, strides, pads`.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32))
         >>> argmax = Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64))
-        >>> maxunpool3d = P.MaxUnpool3D(ksize=1, strides=1, pads=0)
+        >>> maxunpool3d = ops.MaxUnpool3D(ksize=1, strides=1, pads=0)
         >>> output = maxunpool3d(x, argmax)
         >>> print(output.asnumpy())
         [[[[[0. 1.]
@@ -2242,7 +2156,7 @@ class MaxUnpool3D(Primitive):
         self.output_shape = output_shape
-class AvgPool(_Pool):
+class AvgPool(Primitive):
     r"""
     Average pooling operation.
@@ -2307,7 +2221,23 @@ class AvgPool(_Pool):
     @prim_attr_register
     def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
         """Initialize AvgPool."""
-        super(AvgPool, self).__init__(kernel_size, strides, pad_mode, data_format)
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
+        validator.check_value_type('strides', strides, [int, tuple], self.name)
+        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
+        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
+        self.add_prim_attr("pad_mode", self.pad_mode)
+        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
+        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
+            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
+                             f"but got the 'data_format' is {self.format} and "
+                             f"the platform is {context.get_context('device_target')}.")
+        self.add_prim_attr('data_format', self.format)
+        self.kernel_size = _check_positive_int_or_tuple(
+            "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
+        self.add_prim_attr("kernel_size", self.kernel_size)
+        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
+        self.add_prim_attr("strides", self.strides)
 class AvgPoolV1(Primitive):
@@ -2489,6 +2419,22 @@ class MaxPool3DWithArgmax(Primitive):
         \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
         \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
+    The output is a Tensor with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})` and its depth, height and
+    width are:
+    .. math::
+        \begin{array}{ll} \\
+            D_{out} = \frac{D_{in} + 2 \times \text{pads}[0] - \text{dilation}[0] \times (\text{ksize}[0] - 1) - 1}
+                {\text{stride}[0]} + 1 \\
+            H_{out} = \frac{H_{in} + 2 \times \text{pads}[1] - \text{dilation}[1] \times (\text{ksize}[1] - 1) - 1}
+                {\text{stride}[1]} + 1 \\
+            W_{out} = \frac{W_{in} + 2 \times \text{pads}[2] - \text{dilation}[2] \times (\text{ksize}[2] - 1) - 1}
+                {\text{stride}[2]} + 1 \\
+        \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
             value, is an int number that represents depth, height and width of the kernel, or a tuple of
@@ -2524,7 +2470,7 @@ class MaxPool3DWithArgmax(Primitive):
         ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
     Supported Platforms:
-        ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32)
@@ -2586,8 +2532,8 @@ class Conv2DTranspose(Conv2DBackpropInput):
         dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
             Default: 1.
         group (int): Splits input into groups. Default: 1.
-        data_format (str): The format of input and output data. It should be 'NHWC' or 'NCHW'，\
-            default is 'NCHW'.
+        data_format (str): The format of input and output data. It should be 'NHWC' or 'NCHW'.
+            Default is 'NCHW'.
     Inputs:
         - **dout** (Tensor) - the gradients with respect to the output of the convolution.
@@ -2704,7 +2650,8 @@ class NLLLoss(Primitive):
     Inputs:
         - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type only supports float32 or float16.
-        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only supports int32.
+        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N,)`, where each value belong to
+          :math:`[0, C-1]`. Data type only supports int32 or int64.
         - **weight** (Tensor) - The rescaling weight to each class, with shape :math:`(C,)` and data type only
           supports float32 or float16.
@@ -2716,13 +2663,15 @@ class NLLLoss(Primitive):
         - **total_weight** (Tensor) - The `total_weight` is a scalar. The data type is the same with `weight's`.
     Raises:
-        TypeError: If dtype of `logits` or `weight` is neither float16 nor float32, `labels` is not int32.
+        TypeError: If dtype of `logits` or `weight` is neither float16 nor float32.
+        TypeError: If dtype of `labels` is neither int32 nor int64.
         ValueError: If `logits` is not a one or two dimension tensor, `labels` and `weight` are not
                     one dimension tensors.
                     When `logits` is a two dimension tensor, the first dimension of `logits` is not equal to `labels`,
                     and second dimension of `logits` is not equal to `weight`.
                     When `logits` is a one dimension tensor, the dimensions of `logits`, `labels`
                     and `weight` should be equal to each other.
+        ValueError: If the value of `labels` exceed :math:`[0, C-1]`, where :math:`C` is the number of classes.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2826,7 +2775,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
         TypeError: If `is_grad` is not a bool.
         TypeError: If dtype of `logits` is neither float16 nor float32.
         TypeError: If dtype of `labels` is neither int32 nor int64.
-        ValueError: If logits.shape[0] != labels.shape[0].
+        ValueError: If :math:`logits.shape[0] != labels.shape[0]`.
     Supported Platforms:
         ``GPU`` ``CPU``
@@ -2922,14 +2871,19 @@ class ApplyMomentum(Primitive):
         gradient_scale (float): The scale of the gradient. Default: 1.0.
     Inputs:
-        - **variable** (Parameter) - Weights to be updated. Data type must be float.
+        - **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float, float16,
+          int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
         - **accumulation** (Parameter) - Accumulated gradient value by moment weight,
           has the same data type with `variable`.
-        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float number or
-          a scalar tensor with float data type.
+        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
+          float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
+          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
+          complex64, complex128 data type.
         - **gradient** (Tensor) - Gradient, has the same data type as `variable`.
-        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float number or
-          a scalar tensor with float data type.
+        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float64, int64, float, float16, int16, int32,
+          int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
+          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
+          complex64, complex128 data type.
     Outputs:
         Tensor, parameters to be updated.
@@ -3003,21 +2957,47 @@ class SmoothL1Loss(Primitive):
     def __init__(self, beta=1.0, reduction='none'):
         """Initialize SmoothL1Loss."""
         validator.check_value_type('beta', beta, [float], self.name)
-        validator.check('beta', beta, '', 0, Rel.GT, self.name)
+        validator.check('beta', beta, '', 0, validator.GT, self.name)
         validator.check_string(
             reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
+        self.add_prim_attr('sigma', self.beta)
         self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
 class MultiMarginLoss(Primitive):
     r"""
-    Creates a criterion that optimizes a multi-class classification hinge loss (margin-based loss)
-    between input and output.
+    Creates a loss function that minimizes the hinge loss
+    for multi-class classification tasks.
+    The loss is calculated by comparing the input and output of the function.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Refer to :func:`mindspore.ops.multi_margin_loss` for more details.
+    Args:
+        p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: 1.
+        margin (int, optional): A parameter to change pairwise distance. Default: 1.0.
+        reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
+            'sum'. Default: 'mean'.
+            - 'none': no reduction will be applied.
+            - 'mean': the sum of the output will be divided by the number of elements in the output.
+            - 'sum': the output will be summed.
+    Inputs:
+        - **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16 or float64.
+        - **target** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The
+          value of target should be non-negative, less than C.
+        - **weight** (Tensor) - The rescaling weight to each class with shape :math:`(C,)`. Data type only
+          support float16, float32 or float64.
+    Outputs:
+        Tensor, When `reduction` is 'none', the shape is :math:`(N,)`.
+        Otherwise, it is a scalar. Has the same data type with `inputs`.
     Supported Platforms:
-        ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.ones(shape=[3, 3]), mindspore.float32)
@@ -3033,7 +3013,7 @@ class MultiMarginLoss(Primitive):
     def __init__(self, p=1, margin=1.0, reduction="mean"):
         """Initialize MultiMarginLoss"""
         self.p = validator.check_value_type('p', p, [int], self.name)
-        validator.check_int(p, {1, 2}, Rel.IN, 'p', self.name)
+        validator.check_int(p, {1, 2}, validator.IN, 'p', self.name)
         self.margin = validator.check_value_type('margin', margin, [float], self.name)
         self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
         self.init_prim_io_names(inputs=['x', 'target', 'weight'], outputs=['y'])
@@ -3217,10 +3197,13 @@ class RNNTLoss(PrimitiveWithInfer):
         validator.check_equal_int(len(labels_shape), 2, 'labels_rank', self.name)
         validator.check_equal_int(len(input_length_shape), 1, 'input_length_rank', self.name)
         validator.check_equal_int(len(label_length_shape), 1, 'label_length_rank', self.name)
-        validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
-        validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1', acts_shape[2] - 1, Rel.EQ, self.name)
-        validator.check('input_length size', input_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
-        validator.check('label_length size', label_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
+        validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], validator.EQ, self.name)
+        validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1',
+                        acts_shape[2] - 1, validator.EQ, self.name)
+        validator.check('input_length size', input_length_shape[0], 'acts shape[0]',
+                        acts_shape[0], validator.EQ, self.name)
+        validator.check('label_length size', label_length_shape[0], 'acts shape[0]',
+                        acts_shape[0], validator.EQ, self.name)
         costs_shape = (acts_shape[0],)
         return costs_shape, acts_shape
@@ -3301,13 +3284,10 @@ class SGD(PrimitiveWithCheck):
     def check_shape(self, parameters_shape, gradient_shape, learning_rate_shape,
                     accum_shape, momentum_shape, stat_shape):
-        validator.check_positive_int(len(parameters_shape), "parameters rank", self.name)
-        validator.check_int(len(gradient_shape), 0, Rel.GE, f'gradient rank', self.name)
-        validator.check_int(len(learning_rate_shape), 0, Rel.GE, f'learning rate rank', self.name)
-        validator.check_positive_int(len(accum_shape), "accumulation rank", self.name)
-        validator.check_int(len(momentum_shape), 0, Rel.GE, f'momentum rank', self.name)
-        validator.check_int(len(stat_shape), 0, Rel.GE, f'stat rank', self.name)
-        validator.check("gradient shape", gradient_shape, "stat shape", stat_shape, Rel.EQ, self.name)
+        validator.check_int(len(gradient_shape), 0, validator.GE, f'gradient rank', self.name)
+        validator.check_int(len(learning_rate_shape), 0, validator.GE, f'learning rate rank', self.name)
+        validator.check_int(len(momentum_shape), 0, validator.GE, f'momentum rank', self.name)
+        validator.check_int(len(stat_shape), 0, validator.GE, f'stat rank', self.name)
     def check_dtype(self, parameters_dtype, gradient_dtype, learning_rate_dtype,
                     accum_dtype, momentum_dtype, stat_dtype):
@@ -3348,7 +3328,7 @@ class ApplyRMSProp(PrimitiveWithInfer):
                             from being updated. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated.
+        - **var** (Parameter) - Weights to be updated.
         - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
         - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
@@ -3442,7 +3422,7 @@ class ApplyCenteredRMSProp(Primitive):
                             from being updated. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated.
+        - **var** (Parameter) - Weights to be updated.
         - **mean_gradient** (Tensor) - Mean gradients, must be the same type as `var`.
         - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
@@ -3519,9 +3499,9 @@ class LayerNorm(Primitive):
         - **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
           The input of LayerNorm.
         - **gamma** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
-          The learnable parameter `gamma` as the scale on norm.
+          The learnable parameter :math:`\gamma` as the scale on norm.
         - **beta** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
-          The learnable parameter `beta` as the scale on norm.
+          The learnable parameter :math:`\beta` as the scale on norm.
     Outputs:
         tuple[Tensor], tuple of 3 tensors, the normalized input and the updated parameters.
@@ -3576,13 +3556,17 @@ class L2Normalize(Primitive):
     where :math:`\epsilon` is epsilon and :math:`\sum_{i}^{}\left | x_i  \right | ^2` calculate the sum of squares of
     the input `x` along the dimension `axis`.
+    Note:
+        On Ascend, input data type of float64 is currently not supported.
     Args:
         axis (Union[list(int), tuple(int), int]): Specify the axis for calculating the L2 norm. Default: 0.
         epsilon (float): A small value added for numerical stability. Default: 1e-4.
     Inputs:
-        - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, \ldots)`.
-          Data type must be float16 or float32.
+        - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, *)`,
+          where :math:`*` means any number of additional dimensions.
+          Data type must be float16, float32 or float64.
     Outputs:
         Tensor, with the same type and shape as the `x`.
@@ -3591,7 +3575,7 @@ class L2Normalize(Primitive):
         TypeError: If `axis` is not one of the following: list, tuple or int.
         TypeError: If `epsilon` is not a float.
         TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
+        TypeError: If dtype of `x` is not in [float16, float32, float64].
         ValueError: If dimension of `x` is not greater than 0.
     Supported Platforms:
@@ -3653,57 +3637,14 @@ class DropoutDoMask(Primitive):
 class ResizeBilinear(PrimitiveWithInfer):
     r"""
-    Resizes an image to a certain size using the bilinear interpolation.
-    The resizing only affects the lower two dimensions which represent the height and width. The input images
-    can be represented by different data types, but the data types of output images are always float32.
-    For general resize, refer to :func:`mindspore.ops.interpolate` for more details.
-    .. warning::
-        This interface does not support dynamic shape and is subject to change or deletion,
-        use :func:`mindspore.ops.interpolate` instead.
-    Args:
-        size (Union[tuple[int], list[int]]): A tuple or list of 2 int elements :math:`(new\_height, new\_width)`,
-            the new size of the images.
-        align_corners (bool): If true, rescale input by :math:`(new\_height - 1) / (height - 1)`,
-                       which exactly aligns the 4 corners of images and resized images. If false,
-                       rescale by :math:`new\_height / height`. Default: False.
-        half_pixel_centers (bool): Whether half pixel center. If set to True, `align_corners` should be False.
-                           Default: False.
+    This API is deprecated, please use the :class:`mindspore.ops.ResizeBilinearV2` instead.
+    For general resizing with other interpolation methods, refer to :func:`mindspore.ops.interpolate` for more details.
-    Inputs:
-        - **x** (Tensor) - Image to be resized. Input images must be a 4-D tensor with shape
-          :math:`(batch, channels, height, width)`, with data type of float32 or float16.
-    Outputs:
-        Tensor, resized image. 4-D with shape :math:`(batch, channels, new\_height, new\_width)`,
-        with the same data type as input `x`.
-    Raises:
-        TypeError: If `size` is neither a tuple nor list.
-        TypeError: If `align_corners` is not a bool.
-        TypeError: If `half_pixel_centers` is not a bool.
-        TypeError: If `align_corners` and `half_pixel_centers` are all True.
-        TypeError: If `half_pixel_centers` is True and device_target not Ascend.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        TypeError: If `x` is not a Tensor.
-        ValueError: If length of shape of `x` is not equal to 4.
+    Note:
+        Dynamic shape feature is not supported for now.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
-    Examples:
-        >>> x = Tensor([[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]]], mindspore.float32)
-        >>> resize_bilinear = ops.ResizeBilinear((5, 5))
-        >>> output = resize_bilinear(x)
-        >>> print(output)
-        [[[[1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]]]]
+        ``Ascend`` ``GPU`` ``CPU``
     """
     @prim_attr_register
@@ -3723,7 +3664,7 @@ class ResizeBilinear(PrimitiveWithInfer):
             validator.check_positive_int(value, f'{i}th value of size', self.name)
     def infer_shape(self, input_shape):
-        validator.check("dimension of input", len(input_shape), "", 4, Rel.EQ, self.name)
+        validator.check("dimension of input", len(input_shape), "", 4, validator.EQ, self.name)
         input_shape = list(input_shape)
         batch, channel, _, _ = input_shape
         out_shape = [batch, channel]
@@ -3780,22 +3721,24 @@ class UpsampleTrilinear3D(Primitive):
         ValueError: If size of `output_size` is not equal 3 when `output_size` is specified.
     Supported Platforms:
-        ``GPU`` ``CPU``
     Examples:
-        >>> ops = ops.UpsampleTrilinear3D(output_size=[4, 64, 48])
-        >>> out = ops(Tensor(input_data=np.random.randn(2, 3, 4, 512, 256)))
+        >>> net = ops.UpsampleTrilinear3D(output_size=[4, 64, 48])
+        >>> in_x = Tensor(input_data=np.random.randn(2, 3, 4, 512, 256))
+        >>> out = net(in_x)
         >>> print(out.shape)
         (2, 3, 4, 64, 48)
-        ...
-        >>> ops = ops.UpsampleTrilinear3D(output_size=[2, 4, 4])
+        >>>
+        >>> net = ops.UpsampleTrilinear3D(output_size=[2, 4, 4])
         >>> in_x = Tensor(np.arange(1, 5, dtype=np.float32).reshape((1, 1, 1, 2, 2)))
-        >>> out = ops(in_x)
+        >>> out = net(in_x)
         >>> print(out)
         [[[[[1.   1.25 1.75 2.  ]
             [1.5  1.75 2.25 2.5 ]
             [2.5  2.75 3.25 3.5 ]
             [3.   3.25 3.75 4.  ]]
            [[1.   1.25 1.75 2.  ]
             [1.5  1.75 2.25 2.5 ]
             [2.5  2.75 3.25 3.5 ]
@@ -4036,7 +3979,7 @@ class GetNext(Primitive):
         """Initialize GetNext."""
         validator.check_value_type("types", types, [list, tuple], self.name)
         validator.check_value_type("shapes", shapes, [list, tuple], self.name)
-        validator.check("types length", len(types), "shapes length", len(shapes), Rel.EQ, self.name)
+        validator.check("types length", len(types), "shapes length", len(shapes), validator.EQ, self.name)
         validator.check_value_type("output_num", output_num, [int], self.name)
@@ -4076,8 +4019,8 @@ class PReLU(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['x', 'weight'], outputs=['output'])
-class LSTM(PrimitiveWithInfer):
-    """
+class LSTM(Primitive):
+    r"""
     Performs the Long Short-Term Memory (LSTM) on the input.
     For detailsed information, please refer to :class:`mindspore.nn.LSTM`.
@@ -4092,20 +4035,20 @@ class LSTM(PrimitiveWithInfer):
             LSTM layer except the last layer. The range of dropout is [0.0, 1.0].
     Inputs:
-        - **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`) or
-          (batch_size, seq_len, `input_size`).
-        - **h** (tuple) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **c** (tuple) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
+        - **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or
+          :math:`(batch\_size, seq\_len, input\_size)`.
+        - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
         - **w** (Tensor) - A weight Tensor.
     Outputs:
         Tuple, a tuple contains (`output`, `h_n`, `c_n`, `reserve`, `state`).
-        - **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
-        - **h_n** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **c_n** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **reserve** (Tensor) - Tensor of shape (r, 1).
-        - **state** (Tensor) - Random number generator state and its shape is (s, 1).
+        - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)`.
+        - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`.
+        - **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`.
     Raises:
         TypeError: If `input_size`, `hidden_size` or `num_layers` is not an int.
@@ -4151,37 +4094,13 @@ class LSTM(PrimitiveWithInfer):
         self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
         self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
         self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
-        self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
+        self.dropout = validator.check_float_range(dropout, 0, 1, validator.INC_BOTH, 'dropout', self.name)
         if bidirectional:
             self.num_directions = 2
         else:
             self.num_directions = 1
-    def infer_shape(self, x_shape, h_shape, c_shape, w_shape):
-        validator.check_equal_int(len(x_shape), 3, "x rank", self.name)
-        validator.check_equal_int(x_shape[2], self.input_size, "x[2]", self.name)
-        # h and c should be same shape
-        validator.check_equal_int(len(h_shape), 3, "h rank", self.name)
-        validator.check("h_shape", h_shape, "c_shape", c_shape, Rel.EQ, self.name)
-        validator.check_int(h_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h[0]", self.name)
-        validator.check_equal_int(h_shape[1], x_shape[1], "h[1]", self.name)
-        validator.check_int(h_shape[2], self.hidden_size, Rel.EQ, "h[2]", self.name)
-        y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
-        # set arbitrary shape for reserved space
-        reserved_shape = (1, 1)
-        state_shape = (1, 1)
-        return y_shape, h_shape, c_shape, reserved_shape, state_shape
-    def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype):
-        args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name)
-        return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype
 class SigmoidCrossEntropyWithLogits(Primitive):
     r"""
@@ -4200,7 +4119,7 @@ class SigmoidCrossEntropyWithLogits(Primitive):
         \end{array}
     Inputs:
-        - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means, any number
+        - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means any number
           of additional dimensions.
         - **label** (Tensor) - Ground truth label. With the same shape and type as `logits`.
@@ -4240,7 +4159,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
         \begin{array}{ll} \\
             p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}} \\
-            L_{ij} = -[Y_{ij} * log(p_{ij}) + (1 - Y_{ij})log(1 - p_{ij})]
+            L_{ij} = -[Y_{ij}log(p_{ij}) + (1 - Y_{ij})log(1 - p_{ij})]
         \end{array}
     :math:`i` indicates the :math:`i^{th}` sample, :math:`j` indicates the category. Then,
@@ -4258,8 +4177,8 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
     and the third method is to calculate the sum of all losses.
     This operator will multiply the output by the corresponding weight.
-    The tensor weight assigns different weights to each piece of data in the batch,
-    and the tensor pos_weight adds corresponding weights to the positive examples of each category.
+    The tensor `weight` assigns different weights to each piece of data in the batch,
+    and the tensor `pos_weight` adds corresponding weights to the positive examples of each category.
     In addition, it can trade off recall and precision by adding weights to positive examples.
     In the case of multi-label classification the loss can be described as:
@@ -4271,8 +4190,8 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
         \end{array}
     where c is the class number (c>1 for multi-label binary classification, c=1 for single-label binary classification),
-    n is the number of the sample in the batch and :math:`p_c` is the weight of the positive answer for the class c.
-    :math:`p_c>1` increases the recall, :math:`p_c<1` increases the precision.
+    n is the number of the sample in the batch and :math:`P_c` is the weight of the positive answer for the class c.
+    :math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
     Args:
         reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none',
@@ -4335,8 +4254,8 @@ class Pad(Primitive):
             be extended behind the input tensor in the `D` th dimension.
     Inputs:
-        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
+        - **input_x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
+          any number of additional dimensions.
     Outputs:
         Tensor, the tensor after padding.
@@ -4372,21 +4291,34 @@ class Pad(Primitive):
 class PadV3(Primitive):
     """
-    Pads the input tensor according to the paddings, `mode` and `paddings_contiguous`.
+    Pads the input Tensor according to the `paddings`, `mode` and `paddings_contiguous`.
     Args:
         mode (str, optional): An optional string indicates padding mode,
-            support "constant", "reflect", "edge". Default: "constant".
+            support "constant", "reflect", "edge", "circular". Default: "constant".
+            The effects of various padding modes are as follows:
+            - "constant": Pads the input Tensor with value specified by `constant_value`.
+            - "reflect": Pads the input Tensor by reflecting the values of the pixels at the
+              boundary of the Tensor.
+            - "edge": Pads the input Tensor with the values of the pixels on the border of the Tensor.
+            - "circular": Circular padding mode. In this mode, the pixels from one edge of the image
+              are wrapped around to the opposite edge, such that the pixel on the right edge of the
+              image is replaced with the pixel on the left edge, and the pixel on the bottom edge
+              is replaced with the pixel on the top edge.
         paddings_contiguous (bool, optional): An optional bool value indicates if the padding is paddings_contiguous.
             If true, paddings is arranged as [begin0, end0, begin1, end1, ...]
             If false, paddings is arranged as [begin0, begin1, ..., end1, end2, ...]
             Default:True.
     Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
-        - **paddings** (Tensor) - Only constant value is allowed. A 1D tensor of type int32 or int64.
-        - **constant_value** (Tensor, optional) - A tensor with the same type as `x`, padding value in 'constant' mode.
+        - **x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
+          any number of additional dimensions.
+        - **paddings** (Tensor) -  Specifies the number of zeros to be padded before and after each
+          dimension of the input Tensor `x`. It's a 1D Tensor of type int32 or int64.
+        - **constant_value** (Tensor, optional) - Padding value to use in 'constant' mode,
+          if not specified, 0 is used instead. It has the same type as `x`.
     Outputs:
         Tensor, the tensor after padding.
@@ -4397,19 +4329,20 @@ class PadV3(Primitive):
         ValueError: If `mode` is not a str or not in support modes.
         ValueError: If `mode` is "constant", the element's number of `paddings` not be even.
         ValueError: If `mode` is "constant", the element's number of `paddings` large than input dim * 2.
-        ValueError: If `mode` is "edge" or "reflect", the element's number of `paddings` is not 2, 4 or 6.
-        ValueError: If `mode` is "edge" or "reflect", `x` dims equals 3,
-            the element's number of `paddings` is 2.
-        ValueError: If `mode` is "edge" or "reflect", `x` dims equals 4,
-            the element's number of `paddings` is 4.
-        ValueError: If `mode` is "edge" or "reflect", `x` dims smaller than 3.
-        ValueError: If `mode` is "edge", x dims bigger than 5.
+        ValueError: If `mode` is "edge" "reflect" or "circular", the element's number of `paddings` is not 2, 4 or 6.
+        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 3,
+            the element's number of `paddings` is not 2.
+        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 4,
+            the element's number of `paddings` is not 4.
+        ValueError: If `mode` is "circular", `x` dims equals 5, the element's number of `paddings` is not 6.
+        ValueError: If `mode` is "edge", "reflect" or "circular", `x` dims smaller than 3.
+        ValueError: If `mode` is "edge" or "circular", x dims bigger than 5.
         ValueError: If `mode` is "reflect", x dims bigger than 4.
         ValueError: If `mode` is "reflect", padding size bigger than the corresponding `x` dimension.
         ValueError: After padding, output's shape number is not greater than 0.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> # case1: mode="reflect", paddings_contiguous=True
@@ -4447,7 +4380,7 @@ class PadV3(Primitive):
     def __init__(self, mode='constant', paddings_contiguous=True):
         """Initialize PadV3"""
         self.init_prim_io_names(inputs=['x', 'paddings', 'constant_value'], outputs=['y'])
-        validator.check_string(mode, ['constant', 'reflect', 'edge'], 'mode', self.name)
+        validator.check_string(mode, ['constant', 'reflect', 'edge', 'circular'], 'mode', self.name)
         validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name)
         self.mode = mode
         self.paddings_contiguous = paddings_contiguous
@@ -4465,7 +4398,7 @@ class MirrorPad(Primitive):
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions.
         - **paddings** (Tensor) - Paddings requires constant tensor. The value of `paddings` is a
-          matrix(list), and its shape is (N, 2). N is the rank of input data. All elements of paddings
+          matrix(list), and its shape is :math:`(N, 2)`. N is the rank of input data. All elements of paddings
           are int type. For the input in the `D` th dimension, paddings[D, 0] indicates how many sizes
           to be extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1]
           indicates how many sizes to be extended behind the input tensor in the `D` th dimension. Both
@@ -4492,7 +4425,7 @@ class MirrorPad(Primitive):
         ValueError: If paddings.size is not equal to 2 * rank of input_x.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> from mindspore import Tensor, nn, ops
@@ -4531,7 +4464,6 @@ class MirrorPad(Primitive):
         self.init_prim_io_names(inputs=['x', 'paddings'], outputs=['y'])
         validator.check_string(mode, ['REFLECT', 'SYMMETRIC'], 'mode', self.name)
         self.mode = mode
-        self.set_const_input_indexes([1])
 class ComputeAccidentalHits(PrimitiveWithCheck):
@@ -4587,13 +4519,14 @@ class ComputeAccidentalHits(PrimitiveWithCheck):
         self.init_prim_io_names(inputs=['true_classes', 'sampled_candidates'],
                                 outputs=['indices', 'ids', 'weights'])
         validator.check_value_type("num_true", num_true, [int], self.name)
-        validator.check_number("num_true", num_true, 1, Rel.GE, self.name)
+        validator.check_number("num_true", num_true, 1, validator.GE, self.name)
         self.num_true = num_true
     def check_shape(self, true_classes_shape, sampled_candidates_shape):
-        validator.check_int(len(true_classes_shape), 2, Rel.EQ, 'dim of true_classes', self.name)
-        validator.check_int(len(sampled_candidates_shape), 1, Rel.EQ, 'dim of sampled_candidates', self.name)
-        validator.check("true_classes shape[1]", true_classes_shape[1], "num_true", self.num_true, Rel.EQ, self.name)
+        validator.check_int(len(true_classes_shape), 2, validator.EQ, 'dim of true_classes', self.name)
+        validator.check_int(len(sampled_candidates_shape), 1, validator.EQ, 'dim of sampled_candidates', self.name)
+        validator.check("true_classes shape[1]", true_classes_shape[1], "num_true",
+                        self.num_true, validator.EQ, self.name)
         indices_len = -1
         return (indices_len,), (indices_len,), (indices_len,)
@@ -4665,7 +4598,7 @@ class ROIAlign(Primitive):
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
         validator.check_value_type("sample_num", sample_num, [int], self.name)
         validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
-        validator.check_int_range(roi_end_mode, 0, 1, Rel.INC_BOTH, "roi_end_mode", self.name)
+        validator.check_int_range(roi_end_mode, 0, 1, validator.INC_BOTH, "roi_end_mode", self.name)
         self.pooled_height = pooled_height
         self.pooled_width = pooled_width
         self.spatial_scale = spatial_scale
@@ -4707,22 +4640,19 @@ class Adam(Primitive):
             If false, update the gradients without using NAG. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
+        - **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
           any number of additional dimensions. The data type can be float16 or float32.
-        - **m** (Tensor) - The 1st moment vector in the updating formula,
-          the shape and data type value should be the same as `var`.
-        - **v** (Tensor) - the 2nd moment vector in the updating formula,
-          the shape and data type value should be the same as `var`. Mean square gradients with the same type as `var`.
-        - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
-          the data type value should be the same as `var`.
-        - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula,
-          the data type value should be the same as `var`.
-        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`,
-          the data type value should be the same as `var`.
-        - **beta1** (float) - The exponential decay rate for the 1st moment estimations,
-          the data type value should be the same as `var`. The paper suggested value is :math:`0.9`.
-        - **beta2** (float) - The exponential decay rate for the 2nd moment estimations,
-          the data type value should be the same as `var`. The paper suggested value is :math:`0.999`.
+        - **m** (Parameter) - The 1st moment vector in the updating formula,
+          the shape should be the same as `var`.
+        - **v** (Parameter) - the 2nd moment vector in the updating formula,
+          the shape should be the same as `var`.
+        - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
+        - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
+        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`.
+        - **beta1** (float) - The exponential decay rate for the 1st moment estimations.
+          The paper suggested value is :math:`0.9`.
+        - **beta2** (float) - The exponential decay rate for the 2nd moment estimations.
+          The paper suggested value is :math:`0.999`.
         - **epsilon** (float) - Term added to the denominator to improve numerical stability.
         - **gradient** (Tensor) - Gradient, has the same shape and data type as `var`.
@@ -4735,7 +4665,7 @@ class Adam(Primitive):
     Raises:
         TypeError: If neither `use_locking` nor `use_nesterov` is a bool.
-        TypeError: If `var`, `m` or `v` is not a Tensor.
+        TypeError: If `var`, `m` or `v` is not a Parameter.
         TypeError: If `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient` is not a Tensor.
     Supported Platforms:
@@ -4761,6 +4691,18 @@ class Adam(Primitive):
         [[0.9996838 0.9996838]
          [0.9996838 0.9996838]]
     """
+    __mindspore_signature__ = (
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T1),
+        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T2),
+        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T3),
+        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T4),
+        sig.make_sig('lr', dtype=sig.sig_dtype.T5),
+        sig.make_sig('beta1', dtype=sig.sig_dtype.T6),
+        sig.make_sig('beta2', dtype=sig.sig_dtype.T7),
+        sig.make_sig('epsilon', dtype=sig.sig_dtype.T8),
+        sig.make_sig('gradient', dtype=sig.sig_dtype.T)
+    )
     @prim_attr_register
     def __init__(self, use_locking=False, use_nesterov=False):
@@ -5323,7 +5265,7 @@ class FusedSparseFtrl(Primitive):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
@@ -5498,7 +5440,7 @@ class KLDivLoss(Primitive):
         elif device_target == "GPU":
             support_mode = ['none', 'mean', 'sum']
         elif device_target == "Ascend":
-            support_mode = ['none', 'batchmean', 'sum']
+            support_mode = ['none', 'batchmean', 'sum', 'mean']
         else:
             raise ValueError(f"'{self.name}' unknown device target: '{device_target}'")
@@ -5518,7 +5460,7 @@ class BinaryCrossEntropy(Primitive):
     In which, :math:`L` indicates the loss of all batch_sizes, :math:`l` indicates the loss of one batch_size,
     and n indicates one batch_size in the 1-N range, :math:`w_n` indicates the
-    weight of nth batch of binary cross entropy. Then,
+    weight of :math:`n`-th batch of binary cross entropy. Then,
     .. math::
         \ell(x, y) = \begin{cases}
@@ -5528,8 +5470,7 @@ class BinaryCrossEntropy(Primitive):
         \end{cases}
     .. warning::
-        - The value of "x" must range from 0 to 1.
-        - The value of "y" must be "0" or "1".
+        - The value of :math:`x` must range from 0 to 1.
     Args:
         reduction (str): Specifies the reduction to be applied to the output.
@@ -5741,7 +5682,7 @@ class ApplyAdadelta(Primitive):
                       is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import numpy as np
@@ -5818,10 +5759,10 @@ class ApplyAdagrad(Primitive):
         update_slots (bool): If `True`, `accum` will be updated. Default: True.
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
+        - **var** (Parameter) - Variable to be updated. With float or complex data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float32 or float16 data type.
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
         - **grad** (Tensor) - A tensor for gradient. The shape and data type must be the same as `var`.
     Outputs:
@@ -5831,12 +5772,12 @@ class ApplyAdagrad(Primitive):
         - **accum** (Tensor) - The same shape and data type as `accum`.
     Raises:
-        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float16 nor float32.
+        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float nor complex.
         TypeError: If `lr` is neither a Number nor a Tensor.
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -5895,7 +5836,7 @@ class ApplyAdagradV2(Primitive):
     the relatively highest priority data type.
     Note:
-        The difference is that `ApplyAdagradV2` has one more small constant value than `ApplyAdagrad`.
+        The difference is that `ApplyAdagradV2` has one more small constant value :math:`\epsilon` than `ApplyAdagrad`.
     Args:
         epsilon (float): A small value added for numerical stability.
@@ -6016,7 +5957,7 @@ class SparseApplyAdagradV2(Primitive):
         - **grad** (Tensor) - Gradients has the same data type as `var` and
           :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
-          The type must be int32 and indices.shape[0] = grad.shape[0].
+          The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         Tuple of 2 tensors, the updated parameters.
@@ -6032,7 +5973,7 @@ class SparseApplyAdagradV2(Primitive):
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -6201,10 +6142,10 @@ class SparseApplyProximalAdagrad(Primitive):
         - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
           a scalar tensor with float16 or float32 data type. It must be non-negative.
         - **grad** (Tensor) - A tensor of the same type as `var` and
-          grad.shape[1:] = var.shape[1:] if var.shape > 1.
+          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
           If there are duplicates in `indices`, the behavior is undefined. Must be one of the
-          following types: int32, int64 and indices.shape[0] = grad.shape[0].
+          following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         Tuple of 2 tensors, the updated parameters.
@@ -6220,7 +6161,7 @@ class SparseApplyProximalAdagrad(Primitive):
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend`` ``GPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -6669,6 +6610,10 @@ class ApplyFtrl(Primitive):
     For more details, please refer to :class:`mindspore.nn.FTRL`.
+    Note:
+        Currently, only positive numbers are supported on the Ascend platform,
+        and the calculation results for other scenarios are not defined.
     Args:
         use_locking (bool): Use locks for updating operation if true . Default: False.
@@ -6697,6 +6642,9 @@ class ApplyFtrl(Primitive):
         TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
         TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
         TypeError: If `grad` is not a Tensor.
+        RuntimeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
+        RuntimeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
+                      and the precision is greater than `var`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6730,6 +6678,17 @@ class ApplyFtrl(Primitive):
          [ 0.00066425 0.15075898]]
     """
+    __mindspore_signature__ = (
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad', dtype=sig.sig_dtype.T),
+        sig.make_sig('lr', dtype=sig.sig_dtype.T),
+        sig.make_sig('l1', dtype=sig.sig_dtype.T),
+        sig.make_sig('l2', dtype=sig.sig_dtype.T),
+        sig.make_sig('lr_power', dtype=sig.sig_dtype.T)
+    )
     @prim_attr_register
     def __init__(self, use_locking=False):
         """Initialize ApplyFtrl."""
@@ -6761,10 +6720,11 @@ class SparseApplyFtrl(Primitive):
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
         - **linear** (Parameter) - The linear coefficient to be updated, must be the same data type and shape as `var`.
-        - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if var.shape > 1.
+        - **grad** (Tensor) - A tensor of the same type as `var` and :math:`grad.shape[1:] = var.shape[1:]`
+          if var.shape > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
           If there are duplicates in `indices`, the behavior is undefined.
-          The type must be int32 or int64 and indices.shape[0] = grad.shape[0].
+          The type must be int32 or int64 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
@@ -6823,83 +6783,19 @@ class SparseApplyFtrl(Primitive):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
         self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
-                                outputs=['var', 'accum', 'linear'])
-        self.add_prim_attr('side_effect_mem', True)
-class SparseApplyFtrlV2(PrimitiveWithInfer):
-    """
-    Updates relevant entries according to the FTRL-proximal scheme. This class has one more attribute, named
-    l2_shrinkage, than class SparseApplyFtrl.
-    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, the lower priority data type will be converted to
-    the relatively highest priority data type.
-    Args:
-        lr (float): The learning rate value, must be positive.
-        l1 (float): l1 regularization strength, must be greater than or equal to zero.
-        l2 (float): l2 regularization strength, must be greater than or equal to zero.
-        l2_shrinkage (float): L2 shrinkage regularization.
-        lr_power (float): Learning rate power controls how the learning rate decreases during training,
-            must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
-        use_locking (bool, optional): If `True`, the var and accumulation tensors will be protected from being updated.
-            Default: False.
-    Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
-          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
-        - **linear** (Parameter) - the linear coefficient to be updated, must be same data type and shape as `var`.
-        - **grad** (Tensor) - A tensor of the same type as `var` and
-          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
-        - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
-          The type must be int32 and indices.shape[0] = grad.shape[0].
-    Outputs:
-        Tuple of 3 Tensor, the updated parameters.
+                                outputs=['var', 'accum', 'linear'])
+        self.add_prim_attr('side_effect_mem', True)
-        - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
-        - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`.
-        - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`.
-    Raises:
-        TypeError: If `lr`, `l1`, `l2`, `lr_power` or `use_locking` is not a float.
-        TypeError: If `use_locking` is not a bool.
-        TypeError: If dtype of `var`, `accum`, `linear` or `grad` is neither float16 nor float32.
-        TypeError: If dtype of `indices` is not int32.
-        RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported.
+class SparseApplyFtrlV2(PrimitiveWithInfer):
+    """
+    The SparseApplyFtrlV2 interface is deprecated, please use the :class:`mindspore.ops.SparseApplyFtrl` instead.
     Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> class SparseApplyFtrlV2Net(nn.Cell):
-        ...     def __init__(self):
-        ...         super(SparseApplyFtrlV2Net, self).__init__()
-        ...         self.sparse_apply_ftrl_v2 = ops.SparseApplyFtrlV2(lr=0.01, l1=0.0, l2=0.0,
-        ...                                                         l2_shrinkage=0.0, lr_power=-0.5)
-        ...         self.var = Parameter(Tensor(np.array([[0.2, 0.3]]).astype(np.float32)), name="var")
-        ...         self.accum = Parameter(Tensor(np.array([[0.5, 0.9]]).astype(np.float32)), name="accum")
-        ...         self.linear = Parameter(Tensor(np.array([[0.7, 0.5]]).astype(np.float32)), name="linear")
-        ...
-        ...     def construct(self, grad, indices):
-        ...         out = self.sparse_apply_ftrl_v2(self.var, self.accum, self.linear, grad, indices)
-        ...         return out
-        ...
-        >>> net = SparseApplyFtrlV2Net()
-        >>> grad = Tensor(np.array([[0.8, 0.5]]).astype(np.float32))
-        >>> indices = Tensor(np.ones([1]), mindspore.int32)
-        >>> output = net(grad, indices)
-        >>> print(output)
-        (Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 2.00000003e-01,  3.00000012e-01]]), Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 5.00000000e-01,  8.99999976e-01]]), Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 6.99999988e-01,  5.00000000e-01]]))
+        Deprecated
     """
     __mindspore_signature__ = (
@@ -6910,6 +6806,7 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
         sig.make_sig('indices', dtype=sig.sig_dtype.T1)
     )
+    @deprecated("2.1", "ops.SparseApplyFtrl", False)
     @prim_attr_register
     def __init__(self, lr, l1, l2, l2_shrinkage, lr_power, use_locking=False):
         """Initialize SparseApplyFtrlV2."""
@@ -6920,18 +6817,18 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.l2_shrinkage = validator.check_value_type("l2_shrinkage", l2_shrinkage, [float], self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
         self.add_prim_attr('side_effect_mem', True)
     def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
-        validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
-        validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
+        validator.check('var shape', var_shape, 'accum shape', accum_shape, validator.EQ, self.name)
+        validator.check('var shape', var_shape, 'linear shape', linear_shape, validator.EQ, self.name)
         if len(var_shape) > 1:
-            validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
-        validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
-        validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
+            validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], validator.EQ, self.name)
+        validator.check_int(len(indices_shape), 1, validator.EQ, "indices rank", self.name)
+        validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], validator.EQ, self.name)
         return var_shape, accum_shape, linear_shape
     def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
@@ -6954,11 +6851,11 @@ class Dropout(PrimitiveWithCheck):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> dropout = ops.Dropout3D(keep_prob=0.5)
+        >>> dropout = ops.Dropout(keep_prob=0.5)
         >>> x = Tensor(np.ones([1, 2, 3, 4, 5]), mindspore.float32)
         >>> output, mask = dropout(x)
-        >>> print(output.shape)
-        (1, 2, 3, 4, 5)
+        >>> print(output.shape, mask.shape, mask.dtype)
+        (1, 2, 3, 4, 5) (16,) UInt8
     """
     @prim_attr_register
@@ -6966,10 +6863,10 @@ class Dropout(PrimitiveWithCheck):
         """Initialize Dropout."""
         self.seed0 = validator.check_value_type("Seed0", Seed0, [int], self.name)
         self.seed1 = validator.check_value_type("Seed1", Seed1, [int], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0, 1, Rel.INC_RIGHT, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0, 1, validator.INC_RIGHT, "keep_prob", self.name)
     def check_shape(self, x_shape):
-        validator.check_int(len(x_shape), 1, Rel.GE, "x_shape", self.name)
+        validator.check_int(len(x_shape), 1, validator.GE, "x_shape", self.name)
     def check_dtype(self, x_dtype):
         valid_dtypes = (mstype.float16, mstype.float32, mstype.float64)
@@ -6987,7 +6884,25 @@ class Dropout2D(PrimitiveWithInfer):
     Note:
         The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout2d`.
-    Refer to :func:`mindspore.ops.dropout2d` for more details.
+    Args:
+        keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
+            means dropping out 20% of channels. Default: 0.5.
+    Inputs:
+        - **x** (Tensor) - A 4-D tensor with shape :math:`(N, C, H, W)`, where N is the batch size, C is the number
+          of channels, H is the feature height, and W is the feature width. The data type should be int8, int16, int32,
+          int64, float16 or float32.
+    Outputs:
+        - **output** (Tensor) - With the same shape and data type as `x`.
+        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        TypeError: If dtype of `x` is not int8, int16, int32, int64, float16, float32 or float64.
+        TypeError: If the data type of `keep_prob` is not float.
+        ValueError: If `keep_prob` is out of the range `[0.0, 1.0]`.
+        ValueError: If `x` shape is not `4D`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -7005,7 +6920,7 @@ class Dropout2D(PrimitiveWithInfer):
         """Initialize Dropout2D."""
         super().__init__("Dropout2D")
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
 class Dropout3D(PrimitiveWithInfer):
@@ -7014,12 +6929,28 @@ class Dropout3D(PrimitiveWithInfer):
     with probability 1-`keep_prob` from a Bernoulli distribution(For a 5-dimensional tensor with a shape of NCDHW,
     the channel feature map refers to a 3-dimensional feature map with a shape of DHW).
-    Dropout3D can improve the independence between channel feature maps.
     Note:
         The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout3d`.
-    Refer to :func:`mindspore.ops.dropout3d` for more details.
+    Dropout3D can improve the independence between channel feature maps.
+    Args:
+        keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
+            means dropping out 20% of channels. Default: 0.5.
+    Inputs:
+        - **x** (Tensor) - A 5-D tensor with shape :math:`(N, C, D, H, W)`, where N is the batch size, C is the number
+          of channels, D is the feature depth, H is the feature height, and W is the feature width.
+          The data type should be int8, int16, int32, int64, float16 or float32.
+    Outputs:
+        - **output** (Tensor) - With the same shape and data type as `x`.
+        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
+    Raises:
+        TypeError: If the data type of `keep_prob` is not float.
+        ValueError: If `keep_prob` is out of the range [0.0, 1.0];
+                    or if the dim of input is not 5-D.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -7037,7 +6968,7 @@ class Dropout3D(PrimitiveWithInfer):
         """Initialize Dropout3D."""
         super().__init__("Dropout3D")
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
 class CTCLoss(Primitive):
@@ -7133,7 +7064,7 @@ class CTCGreedyDecoder(Primitive):
     Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]],
@@ -7173,23 +7104,24 @@ class BasicLSTMCell(PrimitiveWithInfer):
     def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'):
         """Initialize BasicLSTMCell."""
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
         self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
         self.state_is_tuple = validator.check_value_type("state_is_tuple", state_is_tuple, [bool], self.name)
         self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
     def infer_shape(self, x_shape, h_shape, c_shape, w_shape, b_shape):
-        validator.check_int(len(x_shape), 2, Rel.EQ, "x rank", self.name)
-        validator.check_int(len(h_shape), 2, Rel.EQ, "h rank", self.name)
-        validator.check_int(len(c_shape), 2, Rel.EQ, "c rank", self.name)
-        validator.check_int(len(w_shape), 2, Rel.EQ, "w rank", self.name)
-        validator.check_int(len(b_shape), 1, Rel.EQ, "b rank", self.name)
-        validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
-        validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
-        validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], Rel.EQ, self.name)
-        validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
-        validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1], Rel.EQ, self.name)
-        validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
+        validator.check_int(len(x_shape), 2, validator.EQ, "x rank", self.name)
+        validator.check_int(len(h_shape), 2, validator.EQ, "h rank", self.name)
+        validator.check_int(len(c_shape), 2, validator.EQ, "c rank", self.name)
+        validator.check_int(len(w_shape), 2, validator.EQ, "w rank", self.name)
+        validator.check_int(len(b_shape), 1, validator.EQ, "b rank", self.name)
+        validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
+        validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
+        validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], validator.EQ, self.name)
+        validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
+        validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1],
+                        validator.EQ, self.name)
+        validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
         ct_shape = c_shape
         ht_shape = c_shape
         it_shape = c_shape
@@ -7242,8 +7174,10 @@ class DynamicRNN(Primitive):
         keep_prob (float): A float identifying the keep prob in the operator. Default: 1.0.
         cell_clip (float): A float identifying the cell clip in the operator. Default: -1.0.
         num_proj (int): An integer identifying the number projection in the operator. Default: 0.
-        time_major (bool): A bool identifying the time major in the operator. Default: True.
-            Only `True` is currently supported.
+        time_major (bool): A bool specify the data format of `x`. If it is set to True, the format is
+            :math:`(num\_step, batch\_size, input\_size)`, if it is set to False, the format is
+            :math:`(batch\_size, num\_step, input\_size)`.
+            Default: True. Only supports True at present.
         activation (str): A string identifying the type of activation function in the operator. Default: 'tanh'.
             Only 'tanh' is currently supported.
         forget_bias (float): A float identifying the forget bias in the operator. Default: 0.0.
@@ -7500,7 +7434,7 @@ class InTopK(Primitive):
         validator.check_value_type("k", k, [int], self.name)
-class LRN(PrimitiveWithInfer):
+class LRN(Primitive):
     r"""
     Local Response Normalization.
@@ -7509,7 +7443,7 @@ class LRN(PrimitiveWithInfer):
         b_{c} = a_{c}\left(k + \frac{\alpha}{n}
         \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}
-    where the :math:`a_{c}` indicates the specific value of the pixel corresponding to c in feature map;
+    where the :math:`a_{c}` indicates the specific value of the pixel corresponding to :math:`c` in feature map;
     where the :math:`n/2` indicates the `depth_radius`; where the :math:`k` indicates the `bias`;
     where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`.
@@ -7599,10 +7533,10 @@ class AvgPool3D(Primitive):
             - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will
               be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
-        pad (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
-                    head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
-                    integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2],
-                    pad[3], pad[4] and pad[5] correspondingly.
+        pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: 0. If `pad` is an integer,
+            the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
+            If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
+            pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
         ceil_mode (bool): If True, ceil instead of floor to compute the output shape. Default: False.
         count_include_pad (bool): If True, averaging calculation will include the zero-padding. Default: True.
         divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
@@ -7650,7 +7584,7 @@ class AvgPool3D(Primitive):
         self.add_prim_attr('kernel_size', self.kernel_size)
         self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True)
         self.add_prim_attr('strides', self.strides)
-        validator.check_value_type('pad', pad, (int, tuple), self.name)
+        validator.check_value_type('pad', pad, (int, tuple, list), self.name)
         if isinstance(pad, int):
             pad = (pad,) * 6
         if len(pad) != 6:
@@ -7678,7 +7612,104 @@ class Conv3D(Primitive):
     r"""
     3D convolution layer.
-    Refer to :func:`mindspore.ops.conv3d` for more details.
+    Applies a 3D convolution over an input tensor which is typically of shape
+    :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
+    :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
+    :math:`D` is depth, :math:`H, W` is feature height and width respectively.
+    the output value of a layer is calculated as:
+    .. math::
+        \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
+        \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
+        \operatorname{input}\left(N_{i}, k\right))
+    where :math:`k` is kernel,
+    :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
+    :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
+    the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
+    is a convolution kernel slice with shape
+    :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
+    where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
+    the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
+    and :math:`\text{X}` is the input tensor.
+    The shape of full convolution kernel is
+    :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
+    where `groups` is the number of groups to split `input` in the channel dimension.
+    For more details, please refer to the paper `Gradient Based Learning Applied to Document
+    Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
+    If the 'pad_mode' is set to be "valid", the output depth, height and width will be
+    :math:`\left \lfloor{1 + \frac{D_{in} + 2 \times \text{padding} - \text{ks_d} -
+    (\text{ks_d} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and
+    :math:`\left \lfloor{1 + \frac{H_{in} + 2 \times \text{padding} - \text{ks_h} -
+    (\text{ks_h} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and
+    :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
+    (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively. Where
+    :math:`dilation` is Spacing between kernel elements, :math:`stride` is The step length of each step,
+    :math:`padding` is zero-padding added to both sides of the input.
+    Args:
+        out_channel (int): The number of output channel :math:`C_{out}`.
+        kernel_size (Union[int, tuple[int]]): Specifies the depth, height
+            and width of the 3D convolution window. It can be a single int or a tuple of 3 integers.
+            Single int means the value is for the depth, height and width
+            of the kernel. A tuple of 3 ints corresponds to the depth, height and width of the kernel respectively.
+        mode (int): Modes for different convolutions. It is currently not used. Default: 1.
+        stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number
+            that represents the depth, height and width of movement or a tuple of three int numbers that
+            represent depth, height and width movement respectively. Default: 1.
+        pad_mode (str, optional): Specifies padding mode. The optional values are
+            "same", "valid" and "pad". Default: "valid".
+            - same: Adopts the way of completion. The depth, height and width of the output will be equal to
+              the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
+              left and right directions possiblily.
+              Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
+              If this mode is set, `pad` must be 0.
+            - valid: Adopts the way of discarding. The possible largest depth, height and width of output
+              will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
+              must be 0.
+            - pad: Implicit paddings on both sides of the input in depth, height and width. The number of `pad` will
+              be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
+        pad (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
+                    head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
+                    integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2],
+                    pad[3], pad[4] and pad[5] correspondingly.
+        dilation (Union[int, tuple[int]], optional): The data type is int or a tuple of 3 integers
+            :math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1
+            on Ascend backend. Specifies the dilation rate to use for dilated convolution. If set :math:`k > 1`,
+            there will be :math:`k - 1` pixels skipped for each sampling location.
+            The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
+            respectively. Default: 1.
+        group (int, optional):The number of groups into which the filter is divided. `in_channels`
+            and `out_channels` must be divisible by `group`. Default: 1.
+        data_format (str): The optional value for data format. Currently only support "NCDHW".
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
+          Currently input data type only support float16 and float32.
+        - **weight** (Tensor) - Set size of kernel is :math:`(k_d, K_h, K_w)`, then the shape is
+          :math:`(C_{out}, C_{in}/groups, k_d, K_h, K_w)`.
+          Currently weight data type only support float16 and float32.
+        - **bias** (Tensor) - Tensor of shape :math:`C_{in}`. Currently, only support none.
+    Outputs:
+        Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
+    Raises:
+        TypeError: If `out_channel` or `group` is not an int.
+        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
+        ValueError: If `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1.
+        ValueError: If `pad` is less than 0.
+        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
+        ValueError: If `pad` is a tuple whose length is not equal to 6.
+        ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0).
+        ValueError: If `data_format` is not 'NCDHW'.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -7741,7 +7772,11 @@ class Conv3D(Primitive):
         self.add_prim_attr('data_format', self.format)
         self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
         validator.check_value_type("group", group, (int,), self.name)
-        validator.check_int_range(group, 1, out_channel, Rel.INC_BOTH, "group", self.name)
+        validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
+        device_target = context.get_context("device_target")
+        if device_target == "Ascend" and group != 1:
+            raise ValueError("On Ascend platform, group = 1 must be satisfied.")
         self.group = group
         self.add_prim_attr('groups', self.group)
         self.add_prim_attr('offset_x', 0)
@@ -7963,7 +7998,7 @@ class SparseApplyAdadelta(Primitive):
     def __init__(self, epsilon, use_locking=False):
         """Initialize SparseApplyAdadelta"""
         validator.check_value_type("epsilon", epsilon, [float], self.name)
-        validator.check_number("epsilon", epsilon, 0.0, Rel.GE, self.name)
+        validator.check_number("epsilon", epsilon, 0.0, validator.GE, self.name)
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
@@ -7974,11 +8009,15 @@ class CTCLossV2(Primitive):
     The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with
     Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         blank (int, optional): The blank label. Default: 0.
         reduction (str, optional): Apply specific reduction method to the output. Currently only support 'none',
             not case sensitive. Default: "none".
-        zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to zero. Default: False.
+        zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss
+            and its correlated gradient to zero. Default: False.
     Inputs:
         - **log_probs** (Tensor) - A tensor of shape :math:`(T, C, N)`, where :math:`T` is input length, :math:`N` is
@@ -8001,19 +8040,19 @@ class CTCLossV2(Primitive):
         TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64.
         ValueError: If the rank of `log_probs` is not 3.
         ValueError: If the rank of `targets` is not 2.
-        ValueError: If the shape of `input_lengths` does not match {batch_size|N}.
-        ValueError: If the shape of `target_lengths` does not match {batch_size|N}.
+        ValueError: If the shape of `input_lengths` does not match batch_size :math:`N`.
+        ValueError: If the shape of `target_lengths` does not match batch_size :math:`N`.
         TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
         ValueError: If the value of `blank` is not in range [0, num_labels|C).
         RuntimeError: If any value of `input_lengths` is larger than (num_labels|C).
-        RuntimeError: If any `target_lengths[i]` is not in range [0, `input_length[i]` ].
+        RuntimeError: If any `target_lengths[i]` is not in range [0, `input_length[i]`].
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]],
-                                         [[0.9, 0.4, 0.2]]]).astype(np.float32))
+        ...                              [[0.9, 0.4, 0.2]]]).astype(np.float32))
         >>> targets = Tensor(np.array([[0, 1]]), mstype.int32)
         >>> input_lengths = Tensor(np.array([2]), mstype.int32)
         >>> target_lengths = Tensor(np.array([1]), mstype.int32)
@@ -8258,17 +8297,19 @@ class Conv3DTranspose(Primitive):
                              f"when 'pad_mode' is not \"pad\", but got 'output_padding' is "
                              f"{output_padding} and 'pad_mode' is {pad_mode}.")
         self.add_prim_attr('output_padding', self.output_padding)
-        validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2], 1, 343, Rel.INC_BOTH,
-                                  'The product of height, width and depth of kernel_size belonging [1, 343]', self.name)
-        validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, Rel.INC_BOTH,
+        validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2],
+                                  1, 343, validator.INC_BOTH,
+                                  'The product of height, width and depth of kernel_size belonging [1, 343]',
+                                  self.name)
+        validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, validator.INC_BOTH,
                                   'The product of height, width and depth of stride belonging [1, 343]', self.name)
-        validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, Rel.INC_BOTH,
+        validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, validator.INC_BOTH,
                                   'The product of height, width and depth of stride belonging [1, 256]', self.name)
-        validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), validator.INC_LEFT,
                                   'output_padding_d belonging [0, max(stride_d, dilation_d))', self.name)
-        validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), validator.INC_LEFT,
                                   'output_padding_h belonging [0, max(stride_h,dilation_h))', self.name)
-        validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), validator.INC_LEFT,
                                   'output_padding_w belonging [0, max(stride_w,dilation_w))', self.name)
@@ -8286,7 +8327,9 @@ class Dilation2D(Primitive):
         \text{input}(N_i, C_j, s_0 \times h + d_0 \times m, s_1 \times w + d_1 \times n) + \text{filter}(C_j, m, n)
     .. warning::
-        This operator is an experimental operator.
+        This is an experimental API that is subjected to change or deletion.
+    Note:
         If the input data type is float32, this operator is still executed in float16 mode.
     Args:
@@ -8301,7 +8344,7 @@ class Dilation2D(Primitive):
                                       each sampling location. Its value must be greater or equal to 1 and bounded by
                                       the height and width of the input `x`.
-        pad_mode (str): Specifies padding mode. The optional values are
+        pad_mode (str, optional): Specifies padding mode. The optional values are
             "same", "valid". Default: "same". Both upper and lower case are supported.
             - same: Adopts the way of completion. The height and width of the output will be the same as
@@ -8309,10 +8352,10 @@ class Dilation2D(Primitive):
             - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
               without padding. Extra pixels will be discarded.
-        data_format (str): The value for data format, only 'NCHW' is supported at present. Default: "NCHW".
+        data_format (str, optional): The value for data format, only 'NCHW' is supported at present. Default: "NCHW".
     Inputs:
-        - **x** (Tensor) - Input data. A four dimension tensor with float16 or float32 data type. The shape must be
+        - **x** (Tensor) - Input data. A 4-D Tensor, its shape must be
           :math:`(N, C_{in}, H_{in}, W_{in})`.
         - **filter** (Tensor) - A three dimension tensor with the same type as input. The shape must be
           :math:`(C_{in}, H_{filter}, W_{filter})`.
@@ -8398,10 +8441,10 @@ class SoftShrink(Primitive):
     r"""
     Applies the SoftShrink function element-wise.
-    Refer to :func:`mindspore.ops.soft_shrink` for more details.
+    Refer to :func:`mindspore.ops.softshrink` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
@@ -8419,7 +8462,7 @@ class SoftShrink(Primitive):
     def __init__(self, lambd=0.5):
         """Initialize SoftShrink"""
         validator.check_value_type("lambd", lambd, [float], self.name)
-        validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
+        validator.check_number("lambd", lambd, 0, validator.GE, self.name)
 class HShrink(Primitive):
@@ -8429,7 +8472,7 @@ class HShrink(Primitive):
     Refer to :func:`mindspore.ops.hardshrink` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore as ms
@@ -8486,9 +8529,9 @@ class ApplyAdagradDA(Primitive):
     Inputs:
         - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **gradient_accumulator** (Parameter) - The dict of mutable tensor gradient_accumulator. Must have the same
+        - **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`. Must have the same
           shape and dtype as `var`.
-        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor gradient_squared_accumulator.
+        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
           Must have the same shape and dtype as `var`.
         - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
         - **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
@@ -8634,7 +8677,7 @@ class SparseApplyRMSProp(Primitive):
         RuntimeError: If the data type of `var`, `ms`, `mom` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend``  ``CPU`` ``GPU``
+        ``Ascend``  ``GPU`` ``CPU``
     Examples:
         >>> class SparseApplyRMSPropNet(nn.Cell):
@@ -8682,9 +8725,9 @@ class SparseApplyRMSProp(Primitive):
         validator.check_value_type("momentum", momentum, [float], self.name)
         validator.check_value_type("epsilon", epsilon, [float], self.name)
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
-        self.epsilon = validator.check_number("epsilon", epsilon, 0.0, Rel.GT, self.name)
-        self.momentum = validator.check_number("momentum", momentum, 0.0, Rel.GE, self.name)
-        self.rho = validator.check_float_range(rho, 0.0, 1.0, Rel.INC_BOTH, "rho", self.name)
+        self.epsilon = validator.check_number("epsilon", epsilon, 0.0, validator.GT, self.name)
+        self.momentum = validator.check_number("momentum", momentum, 0.0, validator.GE, self.name)
+        self.rho = validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", self.name)
 class SparseApplyCenteredRMSProp(Primitive):
@@ -8751,13 +8794,9 @@ class SparseApplyCenteredRMSProp(Primitive):
         ValueError: If shape of `grad` is not same as shape of `var` except first dimension.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.common.dtype as mstype
-        >>> import mindspore.ops.operations.nn_ops as nn_ops
         >>> var = Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32))
         >>> mg = Tensor(np.array([[0.1, 0.3], [0.1, 0.5]]).astype(np.float32))
         >>> ms = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32))
@@ -8776,10 +8815,10 @@ class SparseApplyCenteredRMSProp(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
-        sig.make_sig('mg', dtype=sig.sig_dtype.T),
-        sig.make_sig('ms', dtype=sig.sig_dtype.T),
-        sig.make_sig('mom', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('mg', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('lr', dtype=sig.sig_dtype.T),
         sig.make_sig('rho', dtype=sig.sig_dtype.T),
         sig.make_sig('momentum', dtype=sig.sig_dtype.T),
@@ -8894,13 +8933,12 @@ class ApplyKerasMomentum(Primitive):
 class MultilabelMarginLoss(Primitive):
     r"""
-    MultilabelMarginLoss operation.
+    Creates a loss criterion that minimizes the hinge loss for multi-class
+    classification tasks.
+    It takes a 2D mini-batch Tensor :math:`x` as input and a 2D
+    Tensor :math:`y` containing target class indices as output.
-    Creates a criterion that optimizes a multi-class multi-classification
-    hinge loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`)
-    and output :math:`y` (which is a 2D `Tensor` of target class indices).
-    Refer to :func:`mindspore.ops.multi_label_margin_loss` for more details.
+    Refer to :func:`mindspore.ops.multilabel_margin_loss` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU``
@@ -8976,7 +9014,7 @@ class ApplyAdamWithAmsgrad(Primitive):
         ValueError: If the shape of `beta1_power`, `beta2_power`, `lr` is not 0.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class ApplyAdamWithAmsgradNet(nn.Cell):
@@ -9025,6 +9063,9 @@ class GridSampler3D(Primitive):
     Given an input and a grid, the output is calculated using the input values
     and pixel positions in the grid. Only volume (5-D) input is supported.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Refer to :func:`mindspore.ops.grid_sample` for more details.
     Supported Platforms:
@@ -9062,35 +9103,40 @@ class FractionalMaxPool(Primitive):
     r"""
     Performs fractional max pooling on the input.
-    Fractional max pooling is similar to regular max pooling, In regular max pooling, you downsize an
-    input set by taking the maximum value of smaller N x N subsections of the set (often 2x2), and try
-    to reduce the set by a factor of N, where N is an integer. Fractional max pooling, means that the
-    overall reduction ratio N does not have to be an integer.
-    The sizes of the pooling regions are generated randomly but are fairly uniform.
+    Fractional max pooling is similar to regular max pooling, but with the added flexibility of
+    allowing the overall reduction ratio `N` to be a non-integer value. In regular max pooling,
+    an input set is reduced in size by taking the maximum value of  `N x N` (usually 2x2)
+    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
+    In contrast, fractional max pooling uses randomly generated pool sizes that are fairly uniform in size.
     .. warning::
         "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
-        and last elements must be 1.0 because we don't allow pooling on batch and channels dimensions.
+        and last elements must be 1.0 because pooling on batch and channels dimensions is not allowed.
     Args:
-        pooling_ratio (list(float)): Decide the shape of output, is a list of floats that has length >= 4.
-            Pooling ratio for each dimension of value should be >=0, currently only support for row and col
-            dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and
-            channels dimensions.
-        pseudo_random(bool, optional): When set to True, generates the pooling
-            sequence in a pseudo random fashion, otherwise, in a random fashion.
-            Check paper Benjamin Graham, Fractional Max-Pooling for difference between pseudo_random and
-            random. Defaults to False.
-        overlapping(bool, optional): When set to True, it means when pooling,
-            the values at the boundary of adjacent pooling cells are used by both cells.
-            When set to False, the values are not reused. Defaults to False.
-        deterministic(bool, optional): When set to True, a fixed pooling region
-            will be used when iterating over a FractionalMaxPool node in the computation graph. Mainly
-            used in unit test to make FractionalMaxPool deterministic. When set to False,
-            fixed pool regions will not be used. Defaults to False.
-        seed(int, optional): If either seed or seed2 are set to be non-zero, the random number generator is
-            seeded by the given seed. Otherwise, it is seeded by a random seed. Defaults to 0.
-        seed2(int, optional): An second seed to avoid seed collision. Defaults to 0.
+        pooling_ratio (list(float)): Decide the shape of output, is a list of float numbers has length >= 4.
+            Pooling ratio for each dimension of value should not be less than 0, currently only support
+            for row and col dimension.
+        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
+            If the pseudo_random parameter is set to True, the sequence will be generated in a
+            pseudo-random fashion, otherwise it will be generated randomly.
+            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
+            by Benjamin Graham to understand the distinction between the two.
+            Default: False.
+        overlapping(bool, optional): When set to True, the values at the boundary of adjacent pooling cells
+            will be shared by both cells during pooling process. When set to False, the values are not reused.
+            Default: False.
+        deterministic(bool, optional): If deterministic is set to True, a fixed pooling region will be used
+            in the computation graph, ensuring that the FractionalMaxPool is deterministic.
+            This is often used in unit tests. When set to False, fixed pool regions will not be used.
+            Default: False.
+        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
+            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
+            the generator will be seeded by a random seed.
+            Default: 0.
+        seed2(int, optional): The second seed to avoid seed collision.
+            Default: 0.
     Inputs:
         - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
@@ -9112,7 +9158,7 @@ class FractionalMaxPool(Primitive):
         ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64)
@@ -9143,9 +9189,10 @@ class FractionalMaxPool(Primitive):
 class FractionalMaxPool3DWithFixedKsize(Primitive):
     r"""
-    This operator applies a 3D fractional max pooling over an input signal composed of several input planes.
-    The max-pooling operation is applied in kD x kH x kW regions by a stochastic step size determined
-    by the target output size.
+    Applies a 3D fractional max pooling to an input signal composed of multiple input planes.
+    The max-pooling operation is applied in :math:`(kD, kH, kW)` regions by a stochastic step size determined by
+    the target output size `output_shape`.
     The number of output features is equal to the number of input planes.
     Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_  for more details.
@@ -9154,13 +9201,11 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
     D the feature depth, H is the feature height, and W is the feature width.
     Args:
-        ksize (Union[float, tuple]): The target ksize is D x H x W.
-            ksize can be a tuple, or a single K for K x K x K.
-            specifying the window size (D, H, W) of the input tensor.
-        output_shape (Union[int, tuple]): The target output_shape is D x H x W.
-            output_shape can be a tuple, or a single H for H x H x H.
-            specifying the size (D, H, W) of the output tensor.
-        data_format (str) : The optional value for data format.
+        ksize (Union[float, tuple]): Size of the pooling window. `ksize` can be a tuple of three values specify a
+            shape :math:`(k_D, k_H, k_W)`, or a single int `K` for :math:`(K, K, K)`.
+        output_shape (Union[int, tuple]): The target output shape. `output_shape` can be a tuple of three values
+            specify a shape :math:`(D_{out}, H_{out}, W_{out})`, or a single float `S` for :math:`(S, S, S)`.
+        data_format (str, optional): The optional value for data format.
             Currently support 'NCDHW' and 'NHDWC'. Default: 'NCDHW'.
     Inputs:
@@ -9193,13 +9238,13 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
         ValueError: If the third dimension size of `random_samples` is not 3.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
         ...       .reshape([1, 1, 2, 2, 4]), mstype.float32)
         >>> random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
-        >>> ksize = (1.0, 1.0, 1.0)
+        >>> ksize = (1, 1, 1)
         >>> output_shape = (1, 1, 2)
         >>> net = ops.FractionalMaxPool3DWithFixedKsize(ksize = ksize, output_shape = output_shape)
         >>> output, argmax = net(x, random_samples)
@@ -9213,15 +9258,15 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
     def __init__(self, ksize, output_shape, data_format="NCDHW"):
         """Initialize FractionalMaxPool3DWithFixedKsize."""
         self.init_prim_io_names(inputs=["x", "random_samples"], outputs=["y", "argmax"])
-        validator.check_value_type("ksize", ksize, [float, tuple], self.name)
+        validator.check_value_type("ksize", ksize, [int, tuple], self.name)
         self.ksize = ksize
-        if isinstance(self.ksize, float):
+        if isinstance(self.ksize, int):
             self.ksize = (ksize, ksize, ksize)
         if len(self.ksize) != 3:
-            raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive float number or a tuple of "
-                             f"three float numbers, but got {len(self.ksize)} numbers.")
+            raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive int number or a tuple of "
+                             f"three int numbers, but got {len(self.ksize)} numbers.")
         for item in self.ksize:
-            validator.check_positive_float(item, 'ksize item', self.name)
+            validator.check_positive_int(item, 'ksize item', self.name)
         self.output_shape = validator.check_value_type("output_shape", output_shape, [int, tuple], self.name)
         self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name)
         self.output_shape = _check_3d_int_or_tuple("output_shape", output_shape,
@@ -9234,11 +9279,10 @@ class FractionalAvgPool(Primitive):
     r"""
     Performs fractional avg pooling on the input.
-    Fractional avg pooling is similar to regular avg pooling, In regular avg pooling, you downsize an
-    input set by taking the avgrage value of smaller N x N subsections of the set (often 2x2), and try
-    to reduce the set by a factor of N, where N is an integer. Fractional avg pooling, means that the
-    overall reduction ratio N does not have to be an integer. In each pooling region, a mean operation
-    is performed.
+    Fractional avg pooling is similar to regular avg pooling, but with the added flexibility of
+    allowing the overall reduction ratio `N` to be a non-integer value. In regular avg pooling,
+    an input set is reduced in size by taking the average value of  `N x N` (usually 2x2)
+    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
     .. warning::
         "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
@@ -9249,20 +9293,25 @@ class FractionalAvgPool(Primitive):
             Pooling ratio for each dimension of value should be >=0, currently only support for row and col
             dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and
             channels dimensions.
-        pseudo_random(bool, optional): When set to True, generates the pooling
-            sequence in a pseudorandom fashion, otherwise, in a random fashion.
-            Check paper Benjamin Graham, Fractional Max-Pooling for difference between pseudo_random and
-            random. Defaults to False.
-        overlapping(bool, optional): When set to True, it means when pooling,
-            the values at the boundary of adjacent pooling cells are used by both cells.
-            When set to False, the values are not reused. Defaults to False.
-        deterministic(bool, optional): When set to True, a fixed pooling region
-            will be used when iterating over a FractionalAvgPool node in the computation graph. Mainly
-            used in unit test to make FractionalAvgPool deterministic. When set to False,
-            fixed pool regions will not be used. Defaults to False.
-        seed(int, optional): If either seed or seed2 are set to be non-zero, the random number generator
-            is seeded by the given seed. Otherwise, it is seeded by a random seed. Defaults to 0.
-        seed2(int, optional): An second seed to avoid seed collision. Defaults to 0.
+        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
+            If the pseudo_random parameter is set to True, the sequence will be generated in a
+            pseudo-random fashion, otherwise it will be generated randomly.
+            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
+            by Benjamin Graham to understand the distinction between the two.
+            Default: False.
+        overlapping(bool, optional): When set to True, the values at the boundary of adjacent pooling cells
+            will be shared by both cells during pooling process. When set to False, the values are not reused.
+            Default: False.
+        deterministic(bool, optional): If deterministic is set to True, a fixed pooling region will be used
+            in the computation graph, ensuring that the FractionalAvgPool is deterministic.
+            This is often used in unit tests. When set to False, fixed pool regions will not be used.
+            Default: False.
+        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
+            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
+            the generator will be seeded by a random seed.
+            Default: 0.
+        seed2(int, optional): The second seed to avoid seed collision.
+            Default: 0.
     Inputs:
         - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
@@ -9284,7 +9333,7 @@ class FractionalAvgPool(Primitive):
         ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64)
@@ -9315,20 +9364,23 @@ class FractionalAvgPool(Primitive):
 class NthElement(Primitive):
     r"""
-    Finds values of the n-th order statistic for the last dimension.
-    If the input is a vector (rank-1), finds the entries which is the nth-smallest value in
-    the vector and outputs their values as scalar tensor.
-    For matrices (resp. higher rank input), computes the entries which is the nth-smallest value in
-    each row (resp. vector along the last dimension). Thus, values.shape = input.shape[:-1].
+    Computes the n-th smallest values for the last dimension of the input Tensor.
+    - When `input` is a 1-D Tensor (i.e. Vector), it finds the nth-smallest value in the vector
+      and outputs its value as a scalar Tensor.
+    - When `input` is matrices or has higher rank, it finds the nth-smallest value
+      in each row (or vector along the last dimension) and outputs
+      these values in a Tensor with shape of `values.shape = input.shape[:-1]`.
     Args:
-        reverse (bool, optional): An optional bool. When set to True, find the nth-largest value
-          in the vector and vice versa. Default: False.
+        reverse (bool, optional): An optional bool. If set to True, it find the :math:`n`-th largest value
+          in the vector instead of the nth-smallest. Default: False.
     Inputs:
-        - **input** (Tensor) - A Tensor. 1-D or higher with last dimension at least :math:`n+1`.
-        - **n** (Union[int, Tensor]) -  If the n is a tensor, it should be a 0-D tensor, dtype is int32.
-          Valid range of n is :math:`[0, input.shape[-1])`.
+        - **input** (Tensor) - Input Tensor with 1-D or higher dimension.
+        - **n** (Union[int, Tensor]) -  If the `n` is a Tensor, it should be a 0-D Tensor, dtype is int32.
+          Valid range of `n` is :math:`[0, input.shape[-1])` where :math:`input.shape[-1]` is
+          last dimension size of `input`.
     Outputs:
         - **values** (Tensor) - Its shape satisfies:  `values`.shape = `input`.shape[:-1].
@@ -9340,7 +9392,7 @@ class NthElement(Primitive):
         ValueError**: If n is out of :math:`[0, input.shape[-1])`.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> input = Tensor(np.array([[1,2,3],[4,5,6]]) , mstype.int8)
@@ -9368,7 +9420,7 @@ class PSROIPooling(Primitive):
         spatial_scale (float): a scaling factor that maps the box coordinates to the input coordinates.
                                For example, if your boxes are defined on the scale of a 224x224 image and
                                your input is a 112x112 feature map (resulting from a 0.5x scaling of the original
-                               image), you’ll want to set this to 0.5.
+                               image), you'll want to set this to 0.5.
         group_size (int): the size of the output (in pixels) after the pooling is performed, as (height, width).
         output_dim (int): the dim of the output after the pooling is performed.
@@ -9428,9 +9480,9 @@ class PSROIPooling(Primitive):
         ...                                       group_size=7)
         >>> out = psROIPooling(features, rois)
         >>> print(out.shape)
-            (4, 3, 7, 7)
+        (4, 3, 7, 7)
         >>> print(out.dtype)
-            Float32
+        Float32
     """
     @prim_attr_register
@@ -9477,11 +9529,9 @@ class TripletMarginLoss(Primitive):
     Args:
         p (int, optional): The norm degree for pairwise distance. Default: 2.
         eps (float, optional): Default: 1e-06.
-        swap (bool, optional): The distance swap is described in detail in the paper
-            `Learning local feature descriptors with triplets and shallow convolutional neural networks`
-            by V. Balntas, E. Riba et al. Default: "False".
+        swap (bool, optional): The distance swap. Default: False.
         reduction (str, optional): Apply specific reduction method to the
-            output: 'none', 'mean', 'sum'. Default: "mean".
+            output: "none", "mean", "sum". Default: "mean".
     Inputs:
         - **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType.
@@ -9512,7 +9562,7 @@ class TripletMarginLoss(Primitive):
         ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``GPU``
     Examples:
         >>> loss = ops.TripletMarginLoss()
@@ -9542,7 +9592,7 @@ class DeformableOffsets(Primitive):
     Refer to :func:`mindspore.ops.deformable_conv2d` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     """
     @prim_attr_register
@@ -9590,26 +9640,39 @@ class DeformableOffsets(Primitive):
 class GridSampler2D(Primitive):
     """
-    This operation samples 2d input_x by using interpolation based on flow field grid, which is usually gennerated by
-    :func:`mindspore.ops.affine_grid`.
+    This operation samples 2d `input_x` by using interpolation based on flow field grid,
+    which is usually gennerated by :func:`mindspore.ops.affine_grid`.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
-        interpolation_mode (str): An optional string specifying the interpolation method. The optional values are
+        interpolation_mode (str, optional): An optional string specifying the interpolation method.
+            The optional values are
             "bilinear" or "nearest". Default: "bilinear".
-        padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
-            "reflection". Default: "zeros".
-        align_corners (bool): An optional bool. When set to True, the centers of the corner pixels of the input
+        padding_mode (str, optional): An optional string specifying the pad method.
+            The optional values are "zeros", "border" or "reflection". Default: "zeros".
+            When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
+            - "zeros": Pads the input tensor with zeros.
+            - "border": Pads the input tensor with the values of the pixels on the border of the tensor.
+            - "reflection": Pads the input tensor by reflecting the values of the pixels at the
+              boundary of the tensor.
+        align_corners (bool, optional): An optional bool. When set to True,
+            the centers of the corner pixels of the input
             and output tensors are aligned. When set to False, it is not aligned. Defaults to False.
     Inputs:
-        - **input_x** (Tensor) - A 4-D tensor with dtype of float16 or float32 and shape of :math:`(N, C,
-          H_{in}, W_{in})`.
-        - **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is :math:`(N,
-          H_{out}, W_{out}, 2)`. Used to specify the sampling pixel locations normalized by the input spatial
+        - **input_x** (Tensor) - A 4-D tensor with dtype of float16 or float32 and shape of
+          :math:`(N, C, H_{in}, W_{in})`.
+        - **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is
+          :math:`(N, H_{out}, W_{out}, 2)`.
+          Used to specify the sampling pixel locations normalized by the input spatial
           dimensions.
     Outputs:
-        A 4-D Tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, C, H_{out}, W_{out})`.
+       A 4-D Tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, C, H_{out}, W_{out})`.
     Raises:
         TypeError: If `input_x` or `grid` is not a Tensor.
@@ -9661,37 +9724,16 @@ class Pdist(Primitive):
     r"""
     Computes the p-norm distance between each pair of row vectors in the input.
-    .. math::
-        y[n] = \sqrt[p]{{\mid x_{i} - x_{j} \mid}^p},
-    where :math:`x_{i}, x_{j}` are two different row vectors in the input.
-    Args:
-        p (float): p value for the p norm distance to calculate between each vector pair ∈[0,∞]. Default: 2.0.
-    Inputs:
-        - **x** (Tensor) - Input tensor with dtype of float16 or float32 and shape of :math:`(N, M)`.
-    Outputs:
-        Tensor, has the same dtype as `x`, whose shape is :math:`(N * (N - 1) / 2)`.
-    Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is not float16, float32 or float64.
-        TypeError: If `p` is not a float.
-        ValueError: If `p` is a negative float.
-        ValueError: If dimension of `x` is not 2.
+    Refer to :func:`mindspore.ops.pdist` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``GPU`` ``CPU``
     Examples:
-        >>> from mindspore import Tensor
-        >>> from mindspore.ops.operations.nn_ops import Pdist
+        >>> from mindspore import Tensor, ops
         >>> import numpy as np
         >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32))
-        >>> op = Pdist(p=2.0)
+        >>> op = ops.Pdist(p=2.0)
         >>> y = op(x)
         >>> print(y)
         [1.4142135 2.828427  1.4142135]
@@ -9743,7 +9785,7 @@ class UpsampleNearest3D(Primitive):
         ValueError: If size of `output_size` is not equal 3 when `output_size` is specified.
     Supported Platforms:
-        ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
@@ -9776,10 +9818,10 @@ class UpsampleNearest3D(Primitive):
             scales = []
         validator.check_value_type('output_size', output_size, [tuple, list], self.name)
         for item in output_size:
-            validator.check_int(item, 0, Rel.GT, 'output_size_item', self.name)
+            validator.check_int(item, 0, validator.GT, 'output_size_item', self.name)
         validator.check_value_type('scales', scales, [tuple, list], self.name)
         for item in scales:
-            validator.check_float(item, 0, Rel.GT, 'scales_item', self.name)
+            validator.check_float(item, 0, validator.GT, 'scales_item', self.name)
         self.add_prim_attr('output_size', output_size)
         self.add_prim_attr('scales', scales)
@@ -9844,16 +9886,12 @@ class SparseApplyAdagradDA(Primitive):
                       conversion of Parameter is not supported.
     Supported Platforms:
-        ``CPU``
+        ``GPU`` ``CPU``
     Examples:
-        >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.common.dtype as mstype
-        >>> import mindspore.ops.operations.nn_ops as nn_ops
-        >>> var = Tensor(np.array([[1,2], [1,2]]).astype(np.float32))
-        >>> grad_accum = Tensor(np.array([[2,1], [3,1]]).astype(np.float32))
-        >>> grad_square_accum = Tensor(np.array([[4,1], [5,1]]).astype(np.float32))
+        >>> var = Parameter(Tensor(np.array([[1,2], [1,2]]).astype(np.float32)))
+        >>> grad_accum = Parameter(Tensor(np.array([[2,1], [3,1]]).astype(np.float32)))
+        >>> grad_square_accum = Parameter(Tensor(np.array([[4,1], [5,1]]).astype(np.float32)))
         >>> grad = Tensor(np.array([[5,1], [6,1]]).astype(np.float32))
         >>> indices = Tensor(np.array([0, 1], dtype=np.int32))
         >>> lr = Tensor(2, mstype.float32)
@@ -9869,9 +9907,9 @@ class SparseApplyAdagradDA(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
-        sig.make_sig('grad_accum', dtype=sig.sig_dtype.T),
-        sig.make_sig('grad_square_accum', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad_square_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('grad', dtype=sig.sig_dtype.T),
         sig.make_sig('indices', dtype=sig.sig_dtype.T1),
         sig.make_sig('lr', dtype=sig.sig_dtype.T),
@@ -9941,7 +9979,7 @@ class SparseApplyMomentum(Primitive):
                       is not supported.
     Supported Platforms:
-        ``CPU``
+        ``GPU`` ``CPU``
     Examples:
         >>> import mindspore.ops.operations.nn_ops as nn_ops
@@ -10025,7 +10063,7 @@ class SparseApplyProximalGradientDescent(Primitive):
                       is not supported.
     Supported Platforms:
-        ``CPU``
+        ``GPU`` ``CPU``
     Examples:
         >>> import mindspore.ops.operations.nn_ops as nn_ops
@@ -10043,7 +10081,7 @@ class SparseApplyProximalGradientDescent(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('alpha', dtype=sig.sig_dtype.T),
         sig.make_sig('l1', dtype=sig.sig_dtype.T),
         sig.make_sig('l2', dtype=sig.sig_dtype.T),
@@ -10104,7 +10142,7 @@ class NuclearNorm(Primitive):
                     x_rank is the dimension of Tensor `x`.
     Supported Platforms:
-        ``CPU``
+        ``Ascend`` ``CPU``
     Examples:
         >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
@@ -10139,7 +10177,7 @@ class NuclearNorm(Primitive):
         """Initialize NuclearNorm."""
         validator.check_value_type("dim", dim, [list, tuple, type(None)], self.name)
         if dim is not None:
-            validator.check_int(len(dim), 2, Rel.EQ, 'length of dim_size', self.name)
+            validator.check_int(len(dim), 2, validator.EQ, 'length of dim_size', self.name)
             validator.check_is_int(dim[0], "dim[0]", self.name)
             validator.check_is_int(dim[1], "dim[1]", self.name)
         else:
@@ -10148,44 +10186,27 @@ class NuclearNorm(Primitive):
 class GLU(Primitive):
-    r"""The gated linear unit.
-    .. math ::
-        \begin{array}{ll} \\
-            \text{GLU}(a, b) = a \otimes \sigma(b)
-        \end{array}
-    where `input` is split in half along `dim` to form `a` and `b`,
-    σ is the sigmoid function and ⊗ is the element-wise product between matrices.
-    Args:
-        axis (int): Dimension on which to split the input.
-            The value of `axis` must be in the range [-rank(`x`), rank(`x`)). Default: -1.
-    Inputs:
-        - **x** (Tensor) - Input tensor. `x.shape[axis]` must be even.
+    r"""
+    Computes GLU (Gated Linear Unit activation function) of input tensors.
-    Outputs:
-        - **y** (Tensor) - The output of Glu, has the same data type with `x`.
-        With the same shape as `x`, except for the dimension of `axis`, y.shape[axis] = x.shape[axis] / 2.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
-    Raises:
-        TypeError: If data type of `x` is not one of the following: float16, float32, float64.
-        TypeError: If data type of `axis` is not int.
-        ValueError: If `axis` is not in the range [-rank(`x`), rank(`x`)).
-        ValueError: If the dimension of the `x` is not equal or greater than 1.
-        ValueError: If `x.shape[axis]` is not even.
+    Refer to :func:`mindspore.ops.glu` for more details.
     Supported Platforms:
         ``Ascend`` ``CPU``
     Examples:
-        >>> from mindspore.ops.operations import nn_ops
+        >>> from mindspore import ops, Tensor
+        >>> from mindspore import dtype as mstype
+        >>> import numpy as np
         >>> axis = 0
         >>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
         ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
         ...                            0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
         ...                            0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
-        >>> glu = nn_ops.GLU(axis=axis)
+        >>> glu = ops.GLU(axis=axis)
         >>> y = glu(x)
         >>> print(y)
         [[[0.20028052 0.6916126  0.57412136 0.06512236 0.26307625]
@@ -10201,19 +10222,18 @@ class GLU(Primitive):
 class FractionalMaxPoolWithFixedKsize(Primitive):
     r"""
     Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
-    The max-pooling operation is applied in kH x kW regions by a stochastic step size determined by
-    the target output size. For any input size, the size of the specified output is H x W. The number
-    of output features is equal to the number of input planes.
+    The max-pooling operation is applied in :math:`(kH, kW)` regions by a stochastic step size determined by
+    the target output size `output_shape`.
+    The number of output features is equal to the number of input planes.
     Fractional MaxPooling is described in the paper `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_.
     Args:
-        ksize (Union[int, tuple[int]]): The size of kernel window used to take the maximum value.
-            The target ksize is H x W. ksize can be a tuple, or a single K for K x K.
-            specifying the window size (H, W) of the input tensor.
-        output_shape (Union[int, tuple[int]]): The target output size is H x W.
-            output_shape can be a tuple, or a single H for H x H.
-            specifying the size (H, W) of the output tensor.
+        ksize (Union[int, tuple[int]]): Size of the pooling window. `ksize` can be a tuple of two values
+          specify a shape :math:`(k_H, k_W)`, or a single int `K` for :math:`(K, K)`.
+        output_shape (Union[int, tuple[int]]): The target output shape. `output_shape` can be a
+          tuple of two values specify a shape :math:`(H_{out}, W_{out})`, or a single float `S` for :math:`(S, S)`.
         data_format (str, optional): The optional value for data format, is 'NCHW'.
             Default: "NCHW".
@@ -10225,7 +10245,7 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
     Outputs:
         - **y** (Tensor) - Has the same type as the `input_x`.
-          Has the shape :math:`(N, C, output\underline{~}shape{H}, output\underline{~}shape{W})`.
+          Has the shape :math:`(N, C, H_{out}, W_{out})`.
         - **argmax** (Tensor) -A tensor whose data type must be int64. Has the same shape as the `y`.
     Raises:
@@ -10277,3 +10297,129 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
         self.add_prim_attr("output_shape", self.output_shape)
         self.data_format = validator.check_string(data_format, ['NCHW'], 'data_format', self.name)
         self.init_prim_io_names(inputs=['input_x', 'random_samples'], outputs=['y', 'argmax'])
+class ChannelShuffle(Primitive):
+    r"""
+    Divide the channels in a tensor of shape (*, C, H, W) into g groups and
+    rearrange them as (*, C/g, g, H*W), while keeping the original tensor shapes.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Refer to :func:`mindspore.ops.channel_shuffle` for more detail.
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+    Examples:
+        >>> group = 2
+        >>> x = Tensor(np.arange(1 * 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
+        >>> channel_shuffle_func = ops.ChannelShuffle(group)
+        >>> y = channel_shuffle_func(x)
+        >>> print(y)
+        [[[[ 0  1]
+           [ 2  3]]
+           [[ 8  9]
+           [10 11]]
+           [[ 4  5]
+           [ 6  7]]
+           [[12 13]
+           [14 15]]]]
+    """
+    @prim_attr_register
+    def __init__(self, group):
+        """Initialize ChannelShuffle"""
+        if not isinstance(group, int):
+            raise ValueError(f"For '{self.name}', attr 'group' must be an positive int number")
+        self.init_prim_io_names(inputs=['x'], outputs=['y'])
+class MaxPoolWithArgmaxV2(Primitive):
+    r"""
+    Performs max pooling on the input Tensor and returns both max values and indices.
+    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
+    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
+    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows:
+    .. math::
+        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
+        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
+    Args:
+        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax
+            value, is an int number that represents height and width of the kernel, or a tuple of
+            two int numbers that represent height and width respectively.
+        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
+            not only the height of movement but also the width of movement, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: None, meaning that `strides = kernel_size`.
+        pads (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both
+            strides, or a tuple of three int numbers that represent depth, height and width of movement respectively.
+        dilation (Union[int, tuple[int]]): Default: '(1, 1)'.
+        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
+        argmax_type (mindspore.dtype) : The dtype for argmax. Default: mstype.int64.
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8,
+          int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
+    Outputs:
+        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
+        - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
+          It has the same data type as `x`.
+        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32 or int64.
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        ValueError: If length of shape of `x` is not equal to 4.
+        TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple.
+        ValueError: If `kernel_size`, `strides` or `dilation` is less than 1.
+        ValueError: If `pads` is less than 0.
+        ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
+        TypeError: If `ceil_mode` is not bool.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
+        >>> maxpool_arg_v2_op = ops.MaxPoolWithArgmaxV2(kernel_size=(3, 2), strides=(2, 1))
+        >>> output_tensor, argmax = maxpool_arg_v2_op(x)
+        >>> print(output_tensor.shape)
+        (20, 16, 24, 31)
+        >>> pirnt(argmax.shape)
+        (20, 16, 24, 31)
+    """
+    @prim_attr_register
+    def __init__(self, kernel_size, strides=None, pads=0, dilation=(1, 1,), ceil_mode=False, argmax_type=mstype.int64):
+        """Initialize MaxPoolWithArgmaxV2."""
+        self.init_prim_io_names(inputs=["x"], outputs=["output", "argmax"])
+        validator.check_value_type("ceil_mode", ceil_mode, bool, self.name)
+        self.ceil_mode = ceil_mode
+        validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name)
+        argmax_type_valid_values = (mstype.int32, mstype.int64)
+        validator.check_type_name("argmax_type", argmax_type, argmax_type_valid_values, self.name)
+        if argmax_type == mstype.int32:
+            self.add_prim_attr("argmax_type", 3)
+        elif argmax_type == mstype.int64:
+            self.add_prim_attr("argmax_type", 4)
+        else:
+            raise ValueError(
+                f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, but got {argmax_type}.")
+        self.kernel_size = _check_positive_int_or_tuple("kernel_size", kernel_size, self.name, ret_four=True,
+                                                        allow_four=True)
+        if strides is None:
+            strides = kernel_size
+        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, ret_four=True, allow_four=True)
+        self.pads = _check_positive_int_or_tuple("pads", pads, self.name, ret_four=True, allow_four=True,
+                                                 strict_positive=False)
+        self.dilation = _check_positive_int_or_tuple("dilation", dilation, self.name, ret_four=True, allow_four=True)
+        self.add_prim_attr("kernel_size", self.kernel_size)
+        self.add_prim_attr("strides", self.strides)
+        self.add_prim_attr("pads", self.pads)
+        self.add_prim_attr("dilation", self.dilation)
+        self.add_prim_attr("ceil_mode", self.ceil_mode)