PyPI - mindspore - Versions diffs - 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl - Mend

mindspore 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show

mindspore/.commit_id +1 -1
mindspore/ConcurrencyCheck.dll +0 -0
mindspore/CppBuildInsights.dll +0 -0
mindspore/CppCoreCheck.dll +0 -0
mindspore/EnumIndex.dll +0 -0
mindspore/EspXEngine.dll +0 -0
mindspore/HResultCheck.dll +0 -0
mindspore/KernelTraceControl.dll +0 -0
mindspore/LocalESPC.dll +0 -0
mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
mindspore/VariantClear.dll +0 -0
mindspore/__init__.py +9 -4
mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +102 -0
mindspore/_checkparam.py +1066 -1001
mindspore/_extends/builtin_operations.py +32 -4
mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
mindspore/_extends/parse/__init__.py +5 -3
mindspore/_extends/parse/namespace.py +17 -2
mindspore/_extends/parse/parser.py +193 -34
mindspore/_extends/parse/resources.py +7 -8
mindspore/_extends/parse/standard_method.py +1780 -435
mindspore/_extends/parse/trope.py +3 -1
mindspore/amp.py +53 -58
mindspore/atlprov.dll +0 -0
mindspore/boost/adasum.py +3 -2
mindspore/boost/boost.py +2 -2
mindspore/boost/boost_cell_wrapper.py +46 -26
mindspore/boost/dim_reduce.py +6 -5
mindspore/boost/grad_accumulation.py +2 -1
mindspore/boost/group_loss_scale_manager.py +1 -1
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/cfgpersist.dll +0 -0
mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
mindspore/common/__init__.py +11 -10
mindspore/common/_decorator.py +2 -0
mindspore/common/_register_for_adapter.py +55 -0
mindspore/common/_stub_tensor.py +201 -0
mindspore/common/_utils.py +57 -0
mindspore/common/api.py +582 -297
mindspore/common/dtype.py +66 -18
mindspore/common/dump.py +2 -2
mindspore/common/initializer.py +38 -1
mindspore/common/jit_config.py +25 -13
mindspore/common/mutable.py +53 -24
mindspore/common/parameter.py +60 -37
mindspore/common/seed.py +8 -24
mindspore/common/sparse_tensor.py +927 -0
mindspore/common/tensor.py +1627 -3900
mindspore/communication/__init__.py +10 -5
mindspore/communication/_comm_helper.py +78 -214
mindspore/communication/_hccl_management.py +2 -1
mindspore/communication/management.py +136 -47
mindspore/config/op_info.config +501 -1008
mindspore/context.py +291 -56
mindspore/d3dcompiler_47.dll +0 -0
mindspore/dataset/__init__.py +12 -8
mindspore/dataset/audio/__init__.py +9 -9
mindspore/dataset/audio/transforms.py +1090 -228
mindspore/dataset/audio/utils.py +87 -39
mindspore/dataset/audio/validators.py +223 -1
mindspore/dataset/callback/ds_callback.py +17 -15
mindspore/dataset/core/config.py +246 -17
mindspore/dataset/core/py_util_helpers.py +4 -3
mindspore/dataset/core/validator_helpers.py +10 -10
mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
mindspore/dataset/debug/debug_hook.py +65 -0
mindspore/dataset/debug/pre_defined_hook.py +67 -0
mindspore/dataset/engine/__init__.py +7 -3
mindspore/dataset/engine/cache_client.py +9 -9
mindspore/dataset/engine/datasets.py +648 -477
mindspore/dataset/engine/datasets_audio.py +165 -167
mindspore/dataset/engine/datasets_standard_format.py +93 -67
mindspore/dataset/engine/datasets_text.py +492 -342
mindspore/dataset/engine/datasets_user_defined.py +85 -50
mindspore/dataset/engine/datasets_vision.py +1224 -699
mindspore/dataset/engine/graphdata.py +134 -69
mindspore/dataset/engine/iterators.py +50 -9
mindspore/dataset/engine/offload.py +52 -31
mindspore/dataset/engine/samplers.py +27 -24
mindspore/dataset/engine/serializer_deserializer.py +14 -15
mindspore/dataset/engine/validators.py +213 -52
mindspore/dataset/text/__init__.py +10 -8
mindspore/dataset/text/transforms.py +152 -57
mindspore/dataset/text/utils.py +98 -49
mindspore/dataset/text/validators.py +25 -0
mindspore/dataset/transforms/__init__.py +4 -2
mindspore/dataset/transforms/c_transforms.py +11 -13
mindspore/dataset/transforms/py_transforms.py +2 -2
mindspore/dataset/transforms/py_transforms_util.py +10 -0
mindspore/dataset/transforms/transforms.py +13 -15
mindspore/dataset/transforms/validators.py +7 -7
mindspore/dataset/utils/__init__.py +2 -1
mindspore/dataset/utils/browse_dataset.py +13 -13
mindspore/dataset/utils/line_reader.py +121 -0
mindspore/dataset/vision/__init__.py +8 -7
mindspore/dataset/vision/c_transforms.py +125 -126
mindspore/dataset/vision/py_transforms.py +37 -37
mindspore/dataset/vision/py_transforms_util.py +23 -20
mindspore/dataset/vision/transforms.py +316 -315
mindspore/dataset/vision/utils.py +313 -17
mindspore/dataset/vision/validators.py +6 -6
mindspore/default_config.py +0 -1
mindspore/dpcmi.dll +0 -0
mindspore/{compression → experimental}/__init__.py +6 -5
mindspore/experimental/map_parameter.py +275 -0
mindspore/include/OWNERS +0 -1
mindspore/include/api/callback/callback.h +9 -13
mindspore/include/api/callback/ckpt_saver.h +2 -2
mindspore/include/api/callback/loss_monitor.h +2 -2
mindspore/include/api/callback/lr_scheduler.h +5 -5
mindspore/include/api/callback/time_monitor.h +2 -2
mindspore/include/api/callback/train_accuracy.h +4 -6
mindspore/include/api/cfg.h +19 -6
mindspore/include/api/context.h +70 -9
mindspore/include/api/delegate.h +8 -1
mindspore/include/api/dual_abi_helper.h +8 -24
mindspore/include/api/metrics/accuracy.h +2 -2
mindspore/include/api/metrics/metrics.h +4 -3
mindspore/include/api/model.h +9 -4
mindspore/include/api/model_group.h +68 -0
mindspore/include/api/model_parallel_runner.h +17 -17
mindspore/include/api/net.h +12 -11
mindspore/include/api/serialization.h +20 -4
mindspore/include/api/status.h +7 -1
mindspore/include/api/types.h +25 -21
mindspore/include/api/visible.h +4 -0
mindspore/include/c_api/model_c.h +5 -0
mindspore/include/c_api/status_c.h +1 -1
mindspore/include/dataset/config.h +1 -1
mindspore/include/dataset/constants.h +14 -0
mindspore/include/dataset/text.h +59 -0
mindspore/include/dataset/vision.h +56 -117
mindspore/include/dataset/vision_lite.h +102 -0
mindspore/jpeg62.dll +0 -0
mindspore/log.py +28 -28
mindspore/mindrecord/common/exceptions.py +2 -4
mindspore/mindrecord/filereader.py +19 -1
mindspore/mindrecord/filewriter.py +250 -88
mindspore/mindrecord/mindpage.py +13 -13
mindspore/mindrecord/shardheader.py +15 -15
mindspore/mindrecord/shardreader.py +9 -0
mindspore/mindrecord/shardwriter.py +29 -29
mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
mindspore/mindrecord/tools/csv_to_mr.py +4 -4
mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/__init__.py +1 -5
mindspore/nn/cell.py +297 -234
mindspore/nn/dynamic_lr.py +1 -1
mindspore/nn/grad/cell_grad.py +17 -42
mindspore/nn/layer/__init__.py +7 -4
mindspore/nn/layer/activation.py +131 -88
mindspore/nn/layer/basic.py +313 -613
mindspore/nn/layer/channel_shuffle.py +103 -0
mindspore/nn/layer/combined.py +1 -1
mindspore/nn/layer/container.py +52 -6
mindspore/nn/layer/conv.py +112 -43
mindspore/nn/layer/dense.py +10 -9
mindspore/nn/layer/embedding.py +36 -34
mindspore/nn/layer/image.py +123 -27
mindspore/nn/layer/math.py +108 -107
mindspore/nn/layer/normalization.py +212 -366
mindspore/nn/layer/padding.py +370 -42
mindspore/nn/layer/pooling.py +1443 -219
mindspore/nn/layer/rnn_cells.py +11 -16
mindspore/nn/layer/rnns.py +38 -39
mindspore/nn/layer/thor_layer.py +24 -25
mindspore/nn/layer/timedistributed.py +5 -5
mindspore/nn/layer/transformer.py +701 -0
mindspore/nn/learning_rate_schedule.py +8 -8
mindspore/nn/loss/__init__.py +9 -6
mindspore/nn/loss/loss.py +678 -142
mindspore/nn/metrics.py +53 -0
mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
mindspore/nn/optim/ada_grad.py +8 -8
mindspore/nn/optim/adadelta.py +2 -3
mindspore/nn/optim/adafactor.py +18 -14
mindspore/nn/optim/adam.py +429 -87
mindspore/nn/optim/adamax.py +5 -6
mindspore/nn/optim/adasum.py +10 -8
mindspore/nn/optim/asgd.py +7 -7
mindspore/nn/optim/ftrl.py +81 -11
mindspore/nn/optim/lamb.py +7 -8
mindspore/nn/optim/lars.py +4 -4
mindspore/nn/optim/lazyadam.py +82 -7
mindspore/nn/optim/momentum.py +8 -7
mindspore/nn/optim/optimizer.py +19 -10
mindspore/nn/optim/proximal_ada_grad.py +6 -5
mindspore/nn/optim/rmsprop.py +3 -3
mindspore/nn/optim/rprop.py +20 -16
mindspore/nn/optim/sgd.py +21 -15
mindspore/nn/optim/thor.py +23 -21
mindspore/nn/probability/__init__.py +0 -2
mindspore/nn/probability/bijector/bijector.py +7 -6
mindspore/nn/probability/bijector/invert.py +4 -2
mindspore/nn/probability/bijector/softplus.py +2 -2
mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
mindspore/nn/probability/distribution/__init__.py +6 -0
mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
mindspore/nn/probability/distribution/_utils/utils.py +11 -17
mindspore/nn/probability/distribution/bernoulli.py +6 -6
mindspore/nn/probability/distribution/beta.py +1 -1
mindspore/nn/probability/distribution/categorical.py +9 -9
mindspore/nn/probability/distribution/cauchy.py +8 -8
mindspore/nn/probability/distribution/distribution.py +12 -6
mindspore/nn/probability/distribution/exponential.py +5 -5
mindspore/nn/probability/distribution/gamma.py +3 -3
mindspore/nn/probability/distribution/geometric.py +6 -5
mindspore/nn/probability/distribution/gumbel.py +5 -5
mindspore/nn/probability/distribution/half_normal.py +133 -0
mindspore/nn/probability/distribution/laplace.py +128 -0
mindspore/nn/probability/distribution/log_normal.py +0 -1
mindspore/nn/probability/distribution/logistic.py +4 -5
mindspore/nn/probability/distribution/normal.py +11 -15
mindspore/nn/probability/distribution/poisson.py +6 -2
mindspore/nn/probability/distribution/student_t.py +150 -0
mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
mindspore/nn/probability/distribution/uniform.py +5 -5
mindspore/nn/reinforcement/_tensors_queue.py +3 -3
mindspore/nn/reinforcement/tensor_array.py +2 -2
mindspore/nn/sparse/sparse.py +8 -1
mindspore/nn/wrap/cell_wrapper.py +55 -27
mindspore/nn/wrap/grad_reducer.py +20 -11
mindspore/nn/wrap/loss_scale.py +47 -30
mindspore/numpy/array_creations.py +33 -22
mindspore/numpy/array_ops.py +46 -42
mindspore/numpy/logic_ops.py +6 -27
mindspore/numpy/math_ops.py +26 -19
mindspore/numpy/utils.py +1 -8
mindspore/numpy/utils_const.py +112 -62
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +6 -3
mindspore/ops/_constants.py +0 -6
mindspore/ops/_grad/__init__.py +2 -1
mindspore/ops/_grad/grad_array_ops.py +209 -152
mindspore/ops/_grad/grad_base.py +55 -17
mindspore/ops/_grad/grad_clip_ops.py +11 -3
mindspore/ops/_grad/grad_comm_ops.py +58 -47
mindspore/ops/_grad/grad_implementations.py +21 -61
mindspore/ops/_grad/grad_inner_ops.py +48 -6
mindspore/ops/_grad/grad_math_ops.py +306 -161
mindspore/ops/_grad/grad_nn_ops.py +192 -181
mindspore/ops/_grad/grad_other_ops.py +1 -1
mindspore/ops/_grad/grad_quant_ops.py +5 -5
mindspore/ops/_grad/grad_sequence_ops.py +296 -0
mindspore/ops/_grad/grad_sparse.py +15 -9
mindspore/ops/_grad_experimental/__init__.py +1 -0
mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
mindspore/ops/_op_impl/__init__.py +3 -3
mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
mindspore/ops/_op_impl/aicpu/abs.py +36 -0
mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
mindspore/ops/_op_impl/aicpu/add.py +43 -0
mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
mindspore/ops/_op_impl/aicpu/conj.py +11 -0
mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
mindspore/ops/_op_impl/aicpu/diag.py +36 -0
mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
mindspore/ops/_op_impl/aicpu/eig.py +35 -0
mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
mindspore/ops/_op_impl/aicpu/glu.py +33 -0
mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
mindspore/ops/_op_impl/aicpu/greater.py +41 -0
mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
mindspore/ops/_op_impl/aicpu/less.py +41 -0
mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
mindspore/ops/_op_impl/aicpu/logit.py +33 -0
mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
mindspore/ops/_op_impl/aicpu/mul.py +3 -1
mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
mindspore/ops/_op_impl/aicpu/polar.py +32 -0
mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
mindspore/ops/_op_impl/aicpu/qr.py +36 -0
mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
mindspore/ops/_op_impl/aicpu/range.py +36 -0
mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
mindspore/ops/_op_impl/aicpu/sort.py +39 -0
mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
mindspore/ops/_op_impl/cpu/__init__.py +1 -2
mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
mindspore/ops/_op_impl/tbe/__init__.py +27 -608
mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
mindspore/ops/_op_impl/tbe/cast.py +0 -2
mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
mindspore/ops/_op_impl/tbe/greater.py +2 -0
mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
mindspore/ops/_op_impl/tbe/slice.py +26 -15
mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
mindspore/ops/_primitive_cache.py +3 -2
mindspore/ops/_register_for_op.py +11 -0
mindspore/ops/_utils/__init__.py +1 -1
mindspore/ops/_utils/utils.py +20 -41
mindspore/ops/_vmap/__init__.py +2 -2
mindspore/ops/_vmap/vmap_array_ops.py +170 -78
mindspore/ops/_vmap/vmap_base.py +24 -10
mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
mindspore/ops/_vmap/vmap_image_ops.py +52 -0
mindspore/ops/_vmap/vmap_math_ops.py +77 -6
mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
mindspore/ops/_vmap/vmap_other_ops.py +3 -1
mindspore/ops/_vmap/vmap_random_ops.py +55 -3
mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
mindspore/ops/bprop_mindir/__init__.py +1 -4
mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
mindspore/ops/composite/__init__.py +12 -13
mindspore/ops/composite/base.py +261 -254
mindspore/ops/composite/env_ops.py +41 -0
mindspore/ops/composite/math_ops.py +197 -156
mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
mindspore/ops/function/__init__.py +323 -8
mindspore/ops/function/array_func.py +3511 -780
mindspore/ops/function/clip_func.py +329 -0
mindspore/ops/function/debug_func.py +6 -6
mindspore/ops/function/grad/__init__.py +5 -1
mindspore/ops/function/grad/grad_func.py +736 -65
mindspore/ops/function/image_func.py +270 -0
mindspore/ops/function/linalg_func.py +268 -8
mindspore/ops/function/math_func.py +8032 -3164
mindspore/ops/function/nn_func.py +5619 -1855
mindspore/ops/function/other_func.py +115 -0
mindspore/ops/function/parameter_func.py +11 -10
mindspore/ops/function/random_func.py +939 -77
mindspore/ops/function/sparse_func.py +249 -84
mindspore/ops/function/sparse_unary_func.py +2303 -0
mindspore/ops/function/spectral_func.py +146 -0
mindspore/ops/function/vmap_func.py +114 -0
mindspore/ops/functional.py +182 -254
mindspore/ops/op_info_register.py +79 -34
mindspore/ops/operations/__init__.py +210 -118
mindspore/ops/operations/_csr_ops.py +7 -7
mindspore/ops/operations/_embedding_cache_ops.py +25 -15
mindspore/ops/operations/_grad_ops.py +447 -322
mindspore/ops/operations/_inner_ops.py +547 -176
mindspore/ops/operations/_map_tensor_ops.py +112 -0
mindspore/ops/operations/_ms_kernel.py +29 -27
mindspore/ops/operations/_ocr_ops.py +11 -11
mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
mindspore/ops/operations/_quant_ops.py +186 -101
mindspore/ops/operations/_rl_inner_ops.py +122 -61
mindspore/ops/operations/_scalar_ops.py +466 -0
mindspore/ops/operations/_sequence_ops.py +1047 -0
mindspore/ops/operations/_tensor_array.py +10 -11
mindspore/ops/operations/_thor_ops.py +4 -4
mindspore/ops/operations/array_ops.py +1428 -1226
mindspore/ops/operations/comm_ops.py +180 -117
mindspore/ops/operations/control_ops.py +4 -2
mindspore/ops/operations/custom_ops.py +185 -98
mindspore/ops/operations/debug_ops.py +92 -54
mindspore/ops/operations/image_ops.py +406 -211
mindspore/ops/operations/inner_ops.py +42 -53
mindspore/ops/operations/linalg_ops.py +32 -29
mindspore/ops/operations/math_ops.py +2076 -897
mindspore/ops/operations/nn_ops.py +1282 -1252
mindspore/ops/operations/other_ops.py +124 -278
mindspore/ops/operations/random_ops.py +345 -178
mindspore/ops/operations/rl_ops.py +8 -9
mindspore/ops/operations/sparse_ops.py +502 -157
mindspore/ops/operations/spectral_ops.py +107 -0
mindspore/ops/primitive.py +192 -15
mindspore/ops/vm_impl_registry.py +23 -2
mindspore/parallel/__init__.py +6 -1
mindspore/parallel/_auto_parallel_context.py +199 -92
mindspore/parallel/_cell_wrapper.py +4 -2
mindspore/parallel/_cost_model_context.py +3 -0
mindspore/parallel/_dp_allreduce_fusion.py +2 -1
mindspore/parallel/_offload_context.py +185 -0
mindspore/parallel/_parallel_serialization.py +167 -28
mindspore/parallel/_ps_context.py +9 -5
mindspore/parallel/_recovery_context.py +1 -1
mindspore/parallel/_tensor.py +9 -1
mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
mindspore/parallel/_utils.py +47 -7
mindspore/parallel/algo_parameter_config.py +5 -1
mindspore/parallel/checkpoint_transform.py +329 -0
mindspore/parallel/shard.py +229 -0
mindspore/perf_msvcbuildinsights.dll +0 -0
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/common/util.py +4 -3
mindspore/profiler/common/validator/validate_path.py +2 -2
mindspore/profiler/envprofiling.py +249 -0
mindspore/profiler/parser/aicpu_data_parser.py +38 -39
mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
mindspore/profiler/parser/base_timeline_generator.py +471 -0
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
mindspore/profiler/parser/framework_parser.py +42 -16
mindspore/profiler/parser/hccl_parser.py +158 -158
mindspore/profiler/parser/hwts_log_parser.py +7 -6
mindspore/profiler/parser/integrator.py +18 -1579
mindspore/profiler/parser/minddata_analyzer.py +8 -8
mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
mindspore/profiler/parser/msadvisor_parser.py +2 -4
mindspore/profiler/parser/optime_parser.py +17 -18
mindspore/profiler/parser/profiler_info.py +108 -0
mindspore/profiler/parser/step_trace_parser.py +1 -1
mindspore/profiler/profiling.py +396 -194
mindspore/rewrite/__init__.py +6 -2
mindspore/rewrite/api/node.py +51 -110
mindspore/rewrite/api/node_type.py +10 -6
mindspore/rewrite/api/pattern_engine.py +51 -7
mindspore/rewrite/api/scoped_value.py +64 -53
mindspore/rewrite/api/symbol_tree.py +108 -61
mindspore/rewrite/api/tree_node_helper.py +2 -3
mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
mindspore/rewrite/ast_helpers/__init__.py +6 -3
mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
mindspore/rewrite/ast_transformers/__init__.py +0 -1
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
mindspore/rewrite/common/__init__.py +2 -0
mindspore/rewrite/common/event.py +1 -1
mindspore/rewrite/common/observable.py +1 -1
mindspore/rewrite/common/observer.py +1 -1
mindspore/rewrite/common/rewrite_elog.py +35 -0
mindspore/rewrite/namer.py +2 -2
mindspore/rewrite/namespace.py +14 -4
mindspore/rewrite/node.py +161 -13
mindspore/rewrite/parser.py +0 -1
mindspore/rewrite/parser_register.py +0 -1
mindspore/rewrite/parsers/arguments_parser.py +3 -2
mindspore/rewrite/parsers/assign_parser.py +267 -67
mindspore/rewrite/parsers/attribute_parser.py +56 -0
mindspore/rewrite/parsers/class_def_parser.py +191 -108
mindspore/rewrite/parsers/constant_parser.py +101 -0
mindspore/rewrite/parsers/container_parser.py +88 -0
mindspore/rewrite/parsers/for_parser.py +28 -15
mindspore/rewrite/parsers/function_def_parser.py +21 -5
mindspore/rewrite/parsers/if_parser.py +11 -28
mindspore/rewrite/parsers/module_parser.py +9 -6
mindspore/rewrite/parsers/return_parser.py +3 -2
mindspore/rewrite/sparsify/__init__.py +0 -0
mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
mindspore/rewrite/sparsify/sparsify.py +109 -0
mindspore/rewrite/sparsify/utils.py +173 -0
mindspore/rewrite/symbol_tree.py +322 -109
mindspore/rewrite/symbol_tree_builder.py +45 -8
mindspore/rewrite/symbol_tree_dumper.py +0 -1
mindspore/rewrite/topological_manager.py +1 -2
mindspore/run_check/_check_version.py +209 -112
mindspore/run_check/run_check.py +2 -1
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +6 -4
mindspore/train/_utils.py +28 -5
mindspore/train/amp.py +321 -50
mindspore/train/callback/__init__.py +3 -1
mindspore/train/callback/_backup_and_restore.py +120 -0
mindspore/train/callback/_callback.py +8 -8
mindspore/train/callback/_checkpoint.py +12 -9
mindspore/train/callback/_early_stop.py +13 -7
mindspore/train/callback/_history.py +8 -8
mindspore/train/callback/_lambda_callback.py +6 -6
mindspore/train/callback/_landscape.py +36 -38
mindspore/train/callback/_loss_monitor.py +12 -6
mindspore/train/callback/_lr_scheduler_callback.py +2 -4
mindspore/train/callback/_on_request_exit.py +212 -0
mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
mindspore/train/callback/_summary_collector.py +27 -19
mindspore/train/callback/_time_monitor.py +13 -7
mindspore/train/checkpoint_pb2.py +68 -8
mindspore/train/data_sink.py +122 -33
mindspore/train/dataset_helper.py +28 -87
mindspore/train/loss_scale_manager.py +4 -7
mindspore/{nn → train}/metrics/__init__.py +20 -20
mindspore/{nn → train}/metrics/accuracy.py +12 -10
mindspore/{nn → train}/metrics/auc.py +4 -4
mindspore/{nn → train}/metrics/bleu_score.py +4 -4
mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
mindspore/{nn → train}/metrics/dice.py +6 -5
mindspore/{nn → train}/metrics/error.py +7 -5
mindspore/{nn → train}/metrics/fbeta.py +9 -7
mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
mindspore/{nn → train}/metrics/loss.py +4 -3
mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
mindspore/{nn → train}/metrics/metric.py +6 -5
mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
mindspore/{nn → train}/metrics/perplexity.py +5 -4
mindspore/{nn → train}/metrics/precision.py +5 -4
mindspore/{nn → train}/metrics/recall.py +5 -4
mindspore/{nn → train}/metrics/roc.py +7 -6
mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
mindspore/{nn → train}/metrics/topk.py +7 -5
mindspore/train/mind_ir_pb2.py +339 -32
mindspore/train/model.py +113 -84
mindspore/train/serialization.py +547 -167
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +43 -12
mindspore/train/train_thor/convert_utils.py +7 -1
mindspore/train/train_thor/dataset_helper.py +3 -3
mindspore/train/train_thor/model_thor.py +0 -4
mindspore/turbojpeg.dll +0 -0
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
{mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
mindspore/compression/common/constant.py +0 -124
mindspore/compression/export/__init__.py +0 -19
mindspore/compression/export/quant_export.py +0 -514
mindspore/compression/quant/qat.py +0 -636
mindspore/compression/quant/quant_utils.py +0 -462
mindspore/compression/quant/quantizer.py +0 -68
mindspore/libatomic-1.dll +0 -0
mindspore/libgcc_s_seh-1.dll +0 -0
mindspore/libgfortran-4.dll +0 -0
mindspore/libgomp-1.dll +0 -0
mindspore/libjpeg-62.dll +0 -0
mindspore/libmindspore.dll +0 -0
mindspore/libmindspore_common.dll +0 -0
mindspore/libmindspore_core.dll +0 -0
mindspore/libmindspore_glog.dll +0 -0
mindspore/libnnacl.dll +0 -0
mindspore/libopencv_core452.dll +0 -0
mindspore/libopencv_imgcodecs452.dll +0 -0
mindspore/libopencv_imgproc452.dll +0 -0
mindspore/libquadmath-0.dll +0 -0
mindspore/libsqlite3.dll +0 -0
mindspore/libssp-0.dll +0 -0
mindspore/libstdc++-6.dll +0 -0
mindspore/libtinyxml2.dll +0 -0
mindspore/libturbojpeg.dll +0 -0
mindspore/libwinpthread-1.dll +0 -0
mindspore/nn/layer/quant.py +0 -1868
mindspore/nn/layer/rnn_utils.py +0 -90
mindspore/nn/probability/dpn/__init__.py +0 -22
mindspore/nn/probability/dpn/vae/__init__.py +0 -25
mindspore/nn/probability/dpn/vae/cvae.py +0 -138
mindspore/nn/probability/dpn/vae/vae.py +0 -122
mindspore/nn/probability/infer/__init__.py +0 -22
mindspore/nn/probability/infer/variational/elbo.py +0 -70
mindspore/nn/probability/infer/variational/svi.py +0 -84
mindspore/nn/probability/toolbox/__init__.py +0 -22
mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
mindspore/nn/probability/transforms/__init__.py +0 -22
mindspore/nn/probability/transforms/transform_bnn.py +0 -262
mindspore/nn/probability/zhusuan/__init__.py +0 -18
mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
mindspore/ops/composite/array_ops.py +0 -210
mindspore/ops/composite/clip_ops.py +0 -238
mindspore/ops/composite/random_ops.py +0 -426
mindspore/ops/composite/vmap_ops.py +0 -38
mindspore/ops/operations/sponge_ops.py +0 -3531
mindspore/ops/operations/sponge_update_ops.py +0 -2546
mindspore/parallel/nn/__init__.py +0 -42
mindspore/parallel/nn/loss.py +0 -22
mindspore/parallel/nn/moe.py +0 -21
mindspore/parallel/nn/op_parallel_config.py +0 -22
mindspore/parallel/nn/transformer.py +0 -31
mindspore/run_check/_check_deps_version.py +0 -84
{mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
{mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -19,16 +19,17 @@ from __future__ import division
 import math
 from functools import partial
 from mindspore import log as logger
 from mindspore._checkparam import _check_3d_int_or_tuple
 from mindspore import context
 from mindspore.ops import signature as sig
-from mindspore._checkparam import Validator as validator
-from mindspore._checkparam import Rel
+from mindspore import _checkparam as validator
 from mindspore.common import dtype as mstype
 from mindspore.common._decorator import deprecated
-from mindspore.ops.primitive import Primitive, PrimitiveWithInfer, PrimitiveWithCheck, prim_attr_register
+from mindspore.ops.primitive import Primitive
+from mindspore.ops.primitive import PrimitiveWithInfer
+from mindspore.ops.primitive import PrimitiveWithCheck
+from mindspore.ops.primitive import prim_attr_register
 def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=False,
@@ -98,30 +99,7 @@ class CeLU(Primitive):
     r"""
     Computes CeLU (Continuously differentiable exponential linear units) of input tensors element-wise.
-    .. math::
-        \text{CeLU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))
-    It returns :math:`\max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))` element-wise.
-    The picture about CeLU looks like this `CeLU <https://arxiv.org/abs/1704.07483>`_.
-    Args:
-        alpha (float): The :math:`\alpha` value for the Celu formulation. Default: 1.0
-    Inputs:
-        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions, with dtype of float16 and float32.
-    Outputs:
-        Tensor, with the same type and shape as the `input_x`.
-    Raises:
-        TypeError: If `alpha` is not a float.
-        ValueError: If `alpha` has the value of 0.
-        TypeError: If `input_x` is not a Tensor.
-        TypeError: If the dtype of 'input_x' is neither float16 nor float32.
+    Refer to :func:`mindspore.ops.celu` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -138,7 +116,7 @@ class CeLU(Primitive):
     def __init__(self, alpha=1.0):
         """Initialize CeLU"""
         validator.check_value_type("alpha", alpha, [float], self.name)
-        validator.check_float(alpha, 0.0, Rel.NE, "alpha", self.name)
+        validator.check_float(alpha, 0.0, validator.NE, "alpha", self.name)
         self.alpha = alpha
         self.add_prim_attr('alpha', self.alpha)
@@ -169,10 +147,13 @@ class AdaptiveAvgPool3D(Primitive):
     r"""
     AdaptiveAvgPool3D operation.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Refer to :func:`mindspore.ops.adaptive_avg_pool3d` for more details.
     Supported Platforms:
-        ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
@@ -199,13 +180,12 @@ class AdaptiveAvgPool3D(Primitive):
     @prim_attr_register
     def __init__(self, output_size):
-        self.add_prim_attr("cust_aicpu", self.name)
         validator.check_value_type("output_size", output_size, [int, tuple], self.name)
         self.output_size = (output_size,) * 3 if isinstance(self.output_size, int) else output_size
         for i, size in enumerate(self.output_size):
             validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
             if size is not None:
-                validator.check_number(f"output_size[{i}]", size, 0, Rel.GE, self.name)
+                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
         self.output_size = tuple(-1 if val is None else val for val in self.output_size)
@@ -213,113 +193,78 @@ class AdaptiveAvgPool3D(Primitive):
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
-class AdaptiveAvgPool2D(PrimitiveWithInfer):
+class AdaptiveAvgPool2D(Primitive):
     r"""
-    2D adaptive average pooling for temporal data.
+    AdaptiveAvgPool2D operation.
-    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more detail.
+    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more details.
     Supported Platforms:
         ``GPU``
     Examples:
         >>> # case 1: output_size=(None, 2)
-        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
+        >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]), mindspore.float32)
         >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
-        [[[[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]]]
+        [[[1.5 2.5]
+          [4.5 5.5]
+          [7.5 8.5]]
+         [[1.5 2.5]
+          [4.5 5.5]
+          [7.5 8.5]]
+         [[1.5 2.5]
+          [4.5 5.5]
+          [7.5 8.5]]]
         >>> # case 2: output_size=2
         >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
-        [[[[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]]]
+        [[[3. 4.]
+          [6. 7.]]
+         [[3. 4.]
+          [6. 7.]]
+         [[3. 4.]
+          [6. 7.]]]
         >>> # case 3: output_size=(1, 2)
         >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
         >>> output = adaptive_avg_pool_2d(input_x)
         >>> print(output)
-        [[[[4.5 5.5]]
-          [[4.5 5.5]]
-          [[4.5 5.5]]]]
+        [[[4.5 5.5]]
+         [[4.5 5.5]]
+         [[4.5 5.5]]]
     """
     @prim_attr_register
     def __init__(self, output_size):
         """Initialize AdaptiveAvgPool2D."""
+        self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type("output_size", output_size, [int, tuple], self.name)
         if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, Rel.EQ, 'length of output_size', self.name)
+            validator.check_int(len(output_size), 2, validator.EQ, 'length of output_size', self.name)
         self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
+        for i, size in enumerate(self.output_size):
+            validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
+            if size is not None:
+                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
-    def infer_shape(self, x_shape):
-        if len(x_shape) <= len(self.output_size):
-            raise ValueError("input_x {} dimension must be larger than output_size {} "
-                             "dimension".format(x_shape, self.output_size))
-        validator.check_int(len(x_shape), 5, Rel.LT, 'input_x_dimensions', self.name)
-        for input_x_dimension in x_shape:
-            validator.check_int(input_x_dimension, 0, Rel.GT, 'input_x dimension', self.name)
-        zipped = zip(self.output_size, x_shape[-len(self.output_size):])
-        out_size = [i if i is not None else j for i, j in zipped]
-        for item in out_size:
-            validator.check_value_type("item of output_size", item, [int], self.name)
-        self.add_prim_attr('output_size', out_size)
-        output_shape = x_shape[:len(x_shape) - len(out_size)] + out_size
-        return output_shape
-    def infer_dtype(self, x_dtype):
-        validator.check_tensor_dtype_valid("x_dtype", x_dtype, [mstype.float16, mstype.float32, mstype.float64],
-                                           self.name)
-        return x_dtype
+        self.output_size = tuple(-1 if val is None else val for val in self.output_size)
+        self.add_prim_attr('output_size', self.output_size)
 class AdaptiveMaxPool2D(Primitive):
     r"""
-    AdaptiveMaxPool2D operation.
-    This operator applies a 2D adaptive max pooling to an input signal composed of multiple input planes.
-    That is, for any input size, the size of the specified output is H x W.
-    The number of output features is equal to the number of input planes.
-    The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels,
-    H is the feature height, and W is the feature width.
+    Performs 2D adaptive max pooling on a multi-plane input signal.
-    For max adaptive pool2d:
-    .. math::
-        \begin{align}
-        h_{start} &= floor(i * H_{in} / H_{out})\\
-        h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\
-        w_{start} &= floor(j * W_{in} / W_{out})\\
-        w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\
-        Output(i,j) &= {\max Input[h_{start}:h_{end}, w_{start}:w_{end}]}
-        \end{align}
-    Note:
-        In Ascend, the second output `argmax` is invalid, please ignore it.
+    Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
     Args:
-        output_size (Union[int, tuple]): The target output size is H x W.
-            ouput_size can be a tuple, or a single H for H x H, and H and W can be int or None
-            which means the output size is the same as the input.
-        return_indices (bool): If `return_indices` is True, the indices of max value would be output.
-            Default: False.
+        output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
+            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
+            If it is None, it means the output size is the same as the input size.
     Inputs:
         - **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor,
@@ -328,17 +273,6 @@ class AdaptiveMaxPool2D(Primitive):
     Outputs:
         Tensor, with the same type as the `input_x`.
-        Shape of the output is `input_x_shape[:len(input_x_shape) - len(out_shape)] + out_shape`.
-    Raises:
-        TypeError: If `output_size` is not int or tuple.
-        TypeError: If `input_x` is not a tensor.
-        TypeError: If `return_indices` is not a bool.
-        TypeError: If dtype of `input_x` is not float16, float32 or float64.
-        ValueError: If `output_size` is a tuple and the length of `output_size` is not 2.
-        ValueError: If the dimension of `input_x` is not NCHW or CHW.
-        ValueError: If `output_size` is less than -1.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -349,7 +283,7 @@ class AdaptiveMaxPool2D(Primitive):
         ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2))
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[2. 3.]
            [5. 6.]
            [8. 9.]]
@@ -362,7 +296,7 @@ class AdaptiveMaxPool2D(Primitive):
         >>> # case 2: output_size=2
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2)
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[5. 6.]
            [8. 9.]]
           [[5. 6.]
@@ -372,40 +306,49 @@ class AdaptiveMaxPool2D(Primitive):
         >>> # case 3: output_size=(1, 2)
         >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2))
         >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output)
+        >>> print(output[0])
         [[[[8. 9.]]
           [[8. 9.]]
           [[8. 9.]]]]
     """
     @prim_attr_register
-    def __init__(self, output_size, return_indices=False):
+    def __init__(self, output_size):
         """Initialize AdaptiveMaxPool2D."""
         validator.check_value_type("output_size", output_size, [int, tuple], self.name)
-        validator.check_value_type("return_indices", return_indices, [bool], self.name)
         if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, Rel.EQ,
+            validator.check_int(len(output_size), 2, validator.EQ,
                                 'length of output_size', self.name)
         self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
         self.output_size = (-1 if self.output_size[0] is None else self.output_size[0],
                             -1 if self.output_size[1] is None else self.output_size[1])
         for size in self.output_size:
-            validator.check_number("output_size", size, -1, Rel.GE, None)
+            validator.check_number("output_size", size, -1, validator.GE, None)
         self.add_prim_attr('output_size', self.output_size)
-        self.add_prim_attr('return_indices', return_indices)
 class AdaptiveMaxPool3D(Primitive):
     r"""
-    Applies a 3D adaptive max pooling over an input signal composed of several input planes.
+    Performs 3D adaptive max pooling on a multi-plane input signal.
-    Refer to :func:`mindspore.ops.adaptive_max_pool3d` for more detail.
+    Refer to :func:`mindspore.ops.adaptive_max_pool3d` for more details.
+    Inputs:
+        - **x** (Tensor) - Tensor, with shape :math:`(C, D, H, W)` or :math:`(N, C, D, H, W)`.
+        - **output_size** (Union[int, tuple]) - The specified output size, which is an integer that represents depth,
+          height and width, or a tuple of three int numbers that represent depth, height and width respectively.
+          The value must be a positive integer. If it is None, the output size and input size of the corresponding
+          dimension are the same.
+    Outputs:
+        - **y** (Tensor) - Tensor, with the same number of dims and data type as the `input`.
+        - **argmax** (Tensor) - Tensor, the indices of max value, which has the same shape as the
+          `y` and it's data type is int32.
     Supported Platforms:
-        ``GPU``
+        ``GPU`` ``CPU``
     Examples:
-        >>> # case 1: Dynamic output size
         >>> class AdaptiveMaxPool3DNet(nn.Cell):
         ...     def __init__(self):
         ...         super(AdaptiveMaxPool3DNet, self).__init__()
@@ -420,23 +363,6 @@ class AdaptiveMaxPool3D(Primitive):
         [[[[33. 35.]]]]
         >>> print(output[1].asnumpy())
         [[[[33 35]]]]
-        >>> # case 2: Constant output size
-        >>> class ConstAdaptiveMaxPool3DNet(nn.Cell):
-        ...     def __init__(self, output_size):
-        ...         super(ConstAdaptiveMaxPool3DNet, self).__init__()
-        ...         self.output_size_ = output_size
-        ...         self.adaptive_max_pool_3d = ops.AdaptiveMaxPool3D()
-        ...     def construct(self, x_):
-        ...         return self.adaptive_max_pool_3d(x_, self.output_size_)
-        >>> x = np.arange(0,36).reshape((1, 3, 3, 4)).astype(np.float32)
-        >>> output_size = np.array([1, 1, 2], dtype=np.int32)
-        >>> net = ConstAdaptiveMaxPool3DNet(Tensor(output_size))
-        >>> output = net(Tensor(x))
-        >>> print(output[0].asnumpy())
-        [[[[33. 35.]]]]
-        >>> print(output[1].asnumpy())
-        [[[[33 35]]]]
     """
     @prim_attr_register
@@ -448,7 +374,7 @@ class Softmax(Primitive):
     r"""
     Applies the Softmax operation to the input tensor on the specified axis.
-    Refer to :func:`mindspore.ops.softmax` for more detail.
+    Refer to :func:`mindspore.ops.softmax` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -476,7 +402,7 @@ class LogSoftmax(Primitive):
     r"""
     Log Softmax activation function.
-    Refer to :func:`mindspore.ops.log_softmax` for more detail.
+    Refer to :func:`mindspore.ops.log_softmax` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -505,7 +431,7 @@ class Softplus(Primitive):
     .. math::
-        \text{output} = \log(1 + \exp(\text{x})),
+        \text{output} = \log(1 + \exp(\text{x}))
     Inputs:
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
@@ -596,7 +522,7 @@ class ReLUV3(Primitive):
     Inputs:
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, data type is
-          `number <https://www.mindspore.cn/docs/en/r1.10/api_python/mindspore.html#mindspore.dtype>`_.
+          `number <https://www.mindspore.cn/docs/en/r2.0/api_python/mindspore.html#mindspore.dtype>`_.
     Outputs:
         Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`.
@@ -652,15 +578,13 @@ class Mish(PrimitiveWithInfer):
         >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> mish = ops.Mish()
         >>> output = mish(x)
-        >>> print(output)
-        [[-0.3034014  3.9974129 -0.0026832]
-         [ 1.9439590  -0.0033576 9.0000000]]
+        >>> print(output.shape)
+        (2, 3)
     """
     @prim_attr_register
     def __init__(self):
         """Initialize Mish"""
-        super().__init__("Mish")
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
@@ -708,7 +632,6 @@ class SeLU(Primitive):
     @prim_attr_register
     def __init__(self):
         """Initialize SeLU"""
-        super().__init__("SeLU")
         self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
@@ -808,14 +731,15 @@ class Elu(Primitive):
         alpha (float): The alpha value of ELU, the data type is float. Only support '1.0' currently. Default: 1.0.
     Inputs:
-        - **input_x** (Tensor) - The input of ELU is a Tensor of any dimension with data type of float16 or float32.
+        - **input_x** (Tensor) - The input of ELU is a Tensor of any dimension with data type of
+          float16, float32 or float64.
     Outputs:
         Tensor, has the same shape and data type as `input_x`.
     Raises:
         TypeError: If `alpha` is not a float.
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
+        TypeError: If dtype of `input_x` is neither float16, float32 nor float64.
         ValueError: If `alpha` is not equal to 1.0.
     Supported Platforms:
@@ -834,7 +758,7 @@ class Elu(Primitive):
     def __init__(self, alpha=1.0):
         """Initialize Elu"""
         validator.check_value_type("alpha", alpha, [float], self.name)
-        validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name)
+        validator.check_number("alpha", alpha, 1.0, validator.EQ, self.name)
         self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
@@ -842,7 +766,7 @@ class HSwish(Primitive):
     r"""
     Hard swish activation function.
-    Refer to :func:`mindspore.ops.hardswish` for more detail.
+    Refer to :func:`mindspore.ops.hardswish` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -887,25 +811,7 @@ class HSigmoid(Primitive):
     r"""
     Hard sigmoid activation function.
-    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
-    Hard sigmoid is defined as:
-    .. math::
-        \text{hsigmoid}(x_{i}) = max(0, min(1, \frac{x_{i} + 3}{6})),
-    where :math:`x_i` is an element of the input Tensor.
-    Inputs:
-        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
-    Outputs:
-        Tensor, with the same type and shape as the `input_x`.
-    Raises:
-        TypeError: If `input_x` is not a Tensor.
+    Refer to :func:`mindspore.ops.hardsigmoid` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -928,7 +834,7 @@ class Tanh(Primitive):
     r"""
     Computes hyperbolic tangent of input element-wise.
-    Refer to :func:`mindspore.ops.tanh` for more detail.
+    Refer to :func:`mindspore.ops.tanh` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU``  ``CPU``
@@ -1048,12 +954,116 @@ class InstanceNorm(PrimitiveWithInfer):
         """Initialize InstanceNorm."""
         self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'],
                                 outputs=['y', 'save_mean', 'save_variance'])
-        self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
-        self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
+        self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        self.momentum = validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
         self._update_parameter = True
         self.add_prim_attr('side_effect_mem', True)
+class InstanceNormV2(Primitive):
+    r"""
+    Instance Normalization over a 4D or 5D input.
+    This operator applies Instance Normalization over a 4D or 5D input (a mini-batch of 2D inputs with
+    additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for
+    Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch
+    of data and the learned parameters which can be described in the following formula.
+    .. math::
+        y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
+    where :math:`\gamma` is scale(gamma), :math:`\beta` is bias(beta), :math:`\epsilon` is epsilon.
+    Note:
+        The format of input `x` support ``NCHW`` and ``NC1HWC0`` in platform ``CPU`` and ``Ascend``.
+        When attr `is_training` is `False`, this module does not tracks the running mean and variance.
+        The output `batch_mean` and `batch_variance` would be all zero.
+    Args:
+        is_training(bool): An optional boolean value. Default: ``True``.
+            When set to ``True``, this module tracks the running mean and variance.
+            When set to ``False``, this module does not track such statistics and always uses batch
+            statistics in both training and eval modes.
+        momentum (float): The hyper parameter to compute moving average for running_mean and running_var
+            (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
+            Momentum value must be [0, 1]. Default: 0.1.
+        epsilon (float): A small value added to the denominator for numerical stability.
+            Epsilon value must be [0, 1). Default: 1e-5.
+    Inputs:
+        - **x** (Tensor) - The input of InstanceNormV2, Tensor of shape :math:`(N, C, H, W)`
+          or :math:`(N, C1, H, W, C0)`, data type: float16 or float32.
+        - **gamma** (Tensor) - Scale, Shape depends on the shape of input `x`, data type: float32.
+          If `x` shape is :math:`(N, C, H, W)`, shape of `gamma` is :math:`(N, C, 1, 1)`.
+          If `x` shape is :math:`(N, C1, H, W, C0)`, shape of `gamma` is :math:`(N, C1, 1, 1, C0)`.
+        - **beta** (Tensor) - Bias, has the same shape and data type as `gamma`.
+        - **mean** (Tensor) - Mean value, has the same shape and data type as `gamma`.
+        - **variance** (Tensor) - Variance value, has the same shape and data type as `gamma`.
+    Outputs:
+        Tuple of 3 Tensors, the normalized input, the mean and variance of batch input.
+        - **y** (Tensor) - The output of InstanceNormV2, same type and shape as the `x`.
+        - **batch_mean** (Tensor) - The mean value of batch input, same type and shape as the input `mean`.
+        - **batch_variance** (Tensor) - The variance value of batch input, same type and shape as the input `variance`.
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+    Raises:
+        TypeError: If either item in the inputs is not Tensor.
+        TypeError: If data type of `x` is neither float16 nor float32.
+        TypeError: If data type of `gamma` is not a Tensor of float32.
+        TypeError: If data type of `beta` is not a Tensor of float32.
+        TypeError: If data type of `mean` is not a Tensor of float32.
+        TypeError: If data type of `variance` is not a Tensor of float32.
+        TypeError: If data type of attr `is_training` is not bool.
+        TypeError: If data type of attr `momentum` is not float.
+        TypeError: If data type of attr `epsilon` is not float.
+        ValueError: If :math:`H * W <= 1` in input `x`.
+        ValueError: If the shape of either item in the inputs is neither 4D nor 5D.
+        ValueError: If `epsilon` is not in the range of [0, 1).
+        ValueError: If `momentum` is not in the range of [0, 1].
+    Examples:
+        >>> x = Tensor(input_data=np.random.randn(128, 48, 32, 64, 12), dtype=mindspore.float32)
+        >>> gamma = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
+        >>> beta = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
+        >>> mean = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
+        >>> var = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
+        >>> ops = P.InstanceNormV2()
+        >>> output = ops(x, gamma, beta, mean, var)
+        >>> y_shape = output[0].shape
+        >>> print(y_shape)
+        (128, 48, 32, 64, 12)
+        >>> batch_mean_shape = output[1].shape
+        >>> print(batch_mean_shape)
+        (128, 48, 1, 1, 12)
+        >>> batch_var_shape = output[2].shape
+        >>> print(batch_var_shape)
+        (128, 48, 1, 1, 12)
+    """
+    __mindspore_signature__ = (
+        sig.make_sig('x', dtype=sig.sig_dtype.T1),
+        sig.make_sig('gamma', dtype=sig.sig_dtype.T),
+        sig.make_sig('beta', dtype=sig.sig_dtype.T),
+        sig.make_sig('mean', dtype=sig.sig_dtype.T),
+        sig.make_sig('variance', dtype=sig.sig_dtype.T),
+    )
+    @prim_attr_register
+    def __init__(self, is_training=True, momentum=0.1, epsilon=1e-5):
+        """Initialize InstanceNormV2."""
+        self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'],
+                                outputs=['y', 'batch_mean', 'batch_variance'])
+        validator.check_is_float(epsilon, 'epsilon', self.name)
+        validator.check_is_float(momentum, 'momentum', self.name)
+        validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
+        validator.check_bool(is_training, "is_training", self.name)
 class BNTrainingReduce(Primitive):
     """
     The BNTrainingReduce interface is deprecated, please use the :class:`mindspore.ops.BatchNorm` instead.
@@ -1092,8 +1102,8 @@ class BNTrainingUpdate(Primitive):
         validator.check_value_type("isRef", isRef, [bool], self.name)
         validator.check_value_type("epsilon", epsilon, [float], self.name)
         validator.check_value_type("factor", factor, [float], self.name)
-        self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
-        self.factor = validator.check_float_range(factor, 0, 1, Rel.INC_BOTH, 'factor', 'BNTrainingUpdate')
+        self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
+        self.factor = validator.check_float_range(factor, 0, 1, validator.INC_BOTH, 'factor', 'BNTrainingUpdate')
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
@@ -1102,7 +1112,7 @@ class BNTrainingUpdate(Primitive):
         self.add_prim_attr('data_format', self.format)
-class BatchNorm(Primitive):
+class BatchNorm(PrimitiveWithInfer):
     r"""
     Batch Normalization for input data and updated parameters.
@@ -1117,8 +1127,9 @@ class BatchNorm(Primitive):
         y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
-    where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon, :math:`mean` is the mean of x,
-    :math:`variance` is the variance of x.
+    where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon,
+    :math:`mean` is the mean of :math:`x`,
+    :math:`variance` is the variance of :math:`x`.
     .. warning::
         - If the operation is used for inference, and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -1132,8 +1143,8 @@ class BatchNorm(Primitive):
         momentum (float): The hyper parameter to compute moving average for running_mean and running_var
             (e.g. :math:`new\_running\_mean = (1 - momentum) * running\_mean + momentum * current\_mean`).
             Momentum value must be [0, 1]. Default: 0.1.
-        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
-            Default: "NCHW".
+        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW', and the 'NHWC' format
+            is only supported in GPU target. Default: "NCHW".
     Inputs:
         If `is_training` is False, inputs are Tensors.
@@ -1169,7 +1180,7 @@ class BatchNorm(Primitive):
         TypeError: If dtype of `input_x`, `scale` is neither float16 nor float32.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> input_x = Tensor(np.ones([2, 2]), mindspore.float32)
@@ -1200,8 +1211,8 @@ class BatchNorm(Primitive):
         else:
             self.add_prim_attr('side_effect_mem', True)
         validator.check_value_type('is_training', is_training, (bool,), self.name)
-        validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
-        validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
+        validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
+        validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
@@ -1211,12 +1222,112 @@ class BatchNorm(Primitive):
         self.init_prim_io_names(inputs=['x', 'scale', 'offset', 'mean', 'variance'],
                                 outputs=['y', 'batch_mean', 'batch_variance', 'reserve_space_1', 'reserve_space_2'])
+    def infer_shape(self, input_x, scale, bias, mean, variance):
+        input_x_channel = input_x[-1] if self.format == "NHWC" else input_x[1]
+        validator.check_equal_int(len(scale), 1, "scale rank", self.name)
+        validator.check("scale shape", scale, "bias shape", bias, validator.EQ, self.name)
+        validator.check("scale shape[0]", scale[0], "input_x channel", input_x_channel, validator.EQ, self.name)
+        if not self.is_training:
+            validator.check_equal_int(len(mean), 1, "mean rank", self.name)
+            validator.check("mean shape", mean, "variance shape", variance, validator.EQ, self.name)
+            validator.check("mean shape", mean, "scale shape", scale, validator.EQ, self.name)
+        return input_x, scale, scale, scale, scale
+    def infer_dtype(self, input_x, scale, bias, mean, variance):
+        validator.check_tensor_dtype_valid("input_x", input_x, [mstype.float16, mstype.float32], self.name)
+        args = {"scale": scale, "bias": bias, "mean": mean, "variance": variance}
+        validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name)
+        return input_x, mstype.float32, mstype.float32, mstype.float32, mstype.float32
 class Conv2D(Primitive):
     r"""
     2D convolution layer.
-    Refer to :func:`mindspore.ops.conv2d` for more detail.
+    Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
+    where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
+    the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
+    For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
+    .. math::
+        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
+    where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
+    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
+    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
+    of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the
+    convolution kernel. The full kernel has shape
+    :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where group is the group number to split the input in the channel dimension.
+    If the 'pad_mode' is set to be "pad", the output height and width will be
+    :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
+    (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
+    :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
+    (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` respectively.
+    Where :math:`dilation` is Spacing between kernel elements, :math:`stride` is The step length of each step,
+    :math:`padding` is zero-padding added to both sides of the input.
+    The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
+    Note:
+        On Ascend platform, :math:`group = 1` must be satisfied.
+    Args:
+        out_channel (int): The number of output channel :math:`C_{out}`.
+        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
+            and width of the 2D convolution window. Single int means the value is for both the height and the width of
+            the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
+            width of the kernel.
+        mode (int): Modes for different convolutions. The value is currently not used. Default: 1.
+        pad_mode (str): Specifies padding mode. The optional values are
+            "same", "valid" and "pad". Default: "valid".
+            - same: Adopts the way of completion. The height and width of the output will be equal to
+              the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
+              left and right possiblily.
+              Otherwise, the last extra padding will be calculated from the bottom and the right side.
+              If this mode is set, `pad` must be 0.
+            - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
+              without padding. Extra pixels will be discarded. If this mode is set, `pad` must be 0.
+            - pad: Implicit paddings on both sides of the input `x`. The number of `pad` will be padded to the input
+              Tensor borders. `pad` must be greater than or equal to 0.
+        pad (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`. If `pad` is one integer,
+                    the paddings of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple
+                    with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
+                    pad[1], pad[2], and pad[3] accordingly. Default: 0.
+        stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
+            the height and width of movement are both strides, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: 1.
+        dilation (Union(int, tuple[int])): The data type is int or a tuple of 2 integers. Specifies the dilation rate
+                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
+                                      be greater than or equal to 1 and bounded by the height and width of the
+                                      input `x`. Default: 1.
+        group (int): Splits input into groups. Default: 1.
+        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: "NCHW".
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+        - **weight** (Tensor) - Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+          then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]}, \text{kernel_size[1]})`.
+    Outputs:
+        Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
+    Raises:
+        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
+        TypeError: If `out_channel` or `group` is not an int.
+        ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
+        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
+        ValueError: If `pad` is a tuple whose length is not equal to 4.
+        ValueError: If `pad_mode` it not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0).
+        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1279,16 +1390,15 @@ class Conv2D(Primitive):
 class DataFormatVecPermute(Primitive):
     r"""
-    Permute input tensor from src_format to dst_format.
+    Converts the input tensor from the `src_format` to the `dst_format` by permuting its dimensions.
     Args:
-        src_format (str): An optional value for source data format. The format can be 'NHWC' and 'NCHW'.
-            Default: 'NHWC'.
-        dst_format (str): An optional value for destination data format. The format can be 'NHWC' and 'NCHW'.
-            Default: 'NCHW'.
+        src_format (str, optional): the source data format, which can be 'NHWC' and 'NCHW'. Default: 'NHWC'.
+        dst_format (str, optional): the target data format, which can be 'NHWC' and 'NCHW'. Default: 'NCHW'.
     Inputs:
-        - **input_x** (Tensor) - A Tensor of shape (4, ) or (4, 2) in source data format. Only supports int32 and int64.
+        - **input_x** (Tensor) - A Tensor of shape :math:`(4, )` or :math:`(4, 2)` in source data format.
+          Supports int32 and int64 datatype.
     Outputs:
         Tensor, has the same data type and shape as the `input_x`.
@@ -1297,7 +1407,7 @@ class DataFormatVecPermute(Primitive):
         TypeError: If `input_x` is not a Tensor.
         TypeError: If dtype of `input_x` is neither int32 nor int64.
         ValueError: If `src_format` or `dst_format` is not a str in ['NHWC', 'NCHW'].
-        ValueError: If input_x shape is not (4, ) or (4, 2).
+        ValueError: If `input_x` shape is not :math:`(4, )` or :math:`(4, 2)`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1384,8 +1494,8 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
     def infer_shape(self, x_shape, w_shape, b_shape=None):
         validator.check_equal_int(len(w_shape), 4, "weight rank", self.name)
         validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
-        validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
-        validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name)
+        validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], validator.EQ, self.name)
+        validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), validator.EQ, self.name)
         kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape
         _, _, stride_h, stride_w = self.stride
@@ -1529,7 +1639,7 @@ class MaxPool(_Pool):
     Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
     regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
-    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
+    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows:
     .. math::
         \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
@@ -1683,55 +1793,11 @@ class MaxPoolV1(Primitive):
 class MaxPoolWithArgmax(Primitive):
     r"""
-    Performs max pooling on the input Tensor and returns both max values and indices.
-    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
-    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
-    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
-    .. math::
-        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
-        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
-    Args:
-        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax
-            value, is an int number that represents height and width of the kernel, or a tuple of
-            two int numbers that represent height and width respectively. Default: 1.
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            not only the height of movement but also the width of movement, or a tuple of two int numbers that
-            represent height and width of movement respectively. Default: 1.
-        pad_mode (str): The optional value for pad mode, is "same" or "valid".
-            Default: "valid".
-            - same: Adopts the way of completion. The height and width of the output will be the same as
-              the input. The total number of padding will be calculated in horizontal and vertical
-              directions and evenly distributed to top, bottom, left and right if possible.
-              Otherwise, the last extra padding will be done from the bottom and the right side.
-            - valid: Adopts the way of discarding. The possible largest height and width of output
-              will be returned without padding. Extra pixels will be discarded.
-        data_format (str) : The optional value for data format, is 'NHWC' or 'NCHW'.
-            Default: 'NCHW'.
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-          Data type must be float16 or float32.
-    Outputs:
-        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
-        - **output** (Tensor) -  Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-          It has the same data type as `x`.
-        - **mask** (Tensor) -  Max values' index represented by the mask. Data type is int32.
-    Raises:
-        TypeError: If the data type of `x` is neither float16 nor float32.
-        TypeError: If `kernel_size` or `strides` is neither an int nor a tuple.
-        TypeError: If `x` is not a Tensor.
+    `ops.MaxPoolWithArgmax` is deprecated from version 2.0 and will be removed in a future version,
+    use `ops.MaxPoolWithArgmaxV2` instead.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        Deprecated
     Examples:
         >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
@@ -1746,6 +1812,7 @@ class MaxPoolWithArgmax(Primitive):
            [33. 34. 35.]]]]
     """
+    @deprecated("2.0", "ops.MaxPoolWithArgmaxV2", False)
     @prim_attr_register
     def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
         """Initialize MaxPoolWithArgmax."""
@@ -1770,13 +1837,13 @@ class MaxPoolWithArgmax(Primitive):
         self.add_prim_attr("strides", self.strides)
-class MaxPool3D(PrimitiveWithInfer):
+class MaxPool3D(Primitive):
     r"""
     Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
     Typically the input is of shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})`, MaxPool outputs
     regional maximum in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size
-    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows.
+    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows:
     .. math::
         \text{output}(N_i, C_j, d, h, w) =
@@ -1815,7 +1882,7 @@ class MaxPool3D(PrimitiveWithInfer):
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
-          Data type must be float16 or float32.
+          Data type must be float16, float32 or float64.
     Outputs:
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the data type of `x`.
@@ -1882,56 +1949,16 @@ class MaxPool3D(PrimitiveWithInfer):
                 validator.check_non_negative_int(item, 'pad_list item', self.name)
         self.add_prim_attr("pad_list", self.pad_list)
-    def infer_shape(self, x_shape):
-        validator.check_equal_int(len(x_shape), 5, "x rank", self.name)
-        batch, channel, input_d, input_h, input_w = x_shape
-        self.add_prim_attr("x_shape", x_shape)
-        _, _, kernel_d, kernel_h, kernel_w = self.kernel_size
-        _, _, stride_d, stride_h, stride_w = self.strides
-        if self.pad_mode == "VALID":
-            out_d = math.ceil((input_d - (kernel_d - 1)) / stride_d)
-            out_h = math.ceil((input_h - (kernel_h - 1)) / stride_h)
-            out_w = math.ceil((input_w - (kernel_w - 1)) / stride_w)
-        elif self.pad_mode == "SAME":
-            out_d = math.ceil(input_d / stride_d)
-            out_h = math.ceil(input_h / stride_h)
-            out_w = math.ceil(input_w / stride_w)
-        else:
-            out_d = ((input_d + self.pad_list[0] + self.pad_list[1] -
-                      (kernel_d - 1) - 1) / stride_d) + 1
-            out_h = ((input_h + self.pad_list[2] + self.pad_list[3] -
-                      (kernel_h - 1) - 1) / stride_h) + 1
-            out_w = ((input_w + self.pad_list[4] + self.pad_list[5] -
-                      (kernel_w - 1) - 1) / stride_w) + 1
-            if self.ceil_mode:
-                out_d = math.ceil(out_d)
-                out_h = math.ceil(out_h)
-                out_w = math.ceil(out_w)
-            else:
-                out_d = math.floor(out_d)
-                out_h = math.floor(out_h)
-                out_w = math.floor(out_w)
-        out_shape = [batch, channel, out_d, out_h, out_w]
-        _check_shape('output', out_shape, self.name)
-        return out_shape
-    def infer_dtype(self, x_dtype):
-        validator.check_tensor_dtype_valid("x", x_dtype, [mstype.float16, mstype.float32], self.name)
-        return x_dtype
 class MaxUnpool2D(Primitive):
     r"""
-    Computes a partial inverse of MaxUnpool2D.
-    MaxUnpool2D is not fully invertible, since the non-maximal values are lost.
+    Calculates the partial inverse of MaxPool2D operation.
-    MaxUnpool2D takes in as input the output of MaxUnpool2D including the indices of the maximal values
-    and computes a partial inverse in which all non-maximal values are set to zero. Typically the input
-    is of shape :math:`(N, C, H_{in}, W_{in})`, the output is of shape :math:`(N, C, H_{out}, W_{out})`,
-    the operation is as follows.
+    Since MaxPool2D loses non-maximal values, it is not fully invertible.
+    Therefore, MaxUnpool2D takes the output of MaxPool2D, including the indices of
+    the maximal values, and computes a partial inverse where all non-maximal values are set to zero.
+    Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` ,
+    the output is of shape :math:`(N, C, H_{out}, W_{out})` , the operation is as follows:
     .. math::
         \begin{array}{ll} \\
@@ -1939,32 +1966,41 @@ class MaxUnpool2D(Primitive):
         W_{out} = (W{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\
         \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents height and width of the kernel, or a tuple
             of two int numbers that represent height and width respectively.
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            the height and width of movement are both strides, or a tuple of two int numbers that
-            represent height and width of movement respectively.
-            If strides is 0 or (0, 0), then strides equal to ksize. Default: 0.
-        pads (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `pads` is an integer,
-            the paddings of height and width are the same, equal to pads. If `pads` is a tuple of two
-            integers, the padding of height and width equal to pads[0] and pads[1] correspondingly.
-        output_shape (tuple[int]) : The target output size is an optional input. Default: ().
-            If output_shape == (), then the shape of output computed by kszie, strides and pads.
-            If output_shape != (), then output_shape must be :math:`(N, C, H, W)` or
-            :math:`(N, H, W, C)` and output_shape must belong to
-            :math:`[(N, C, H_{out} - strides[0], W_{out} - strides[1]),
-            (N, C, H_{out} + strides[0], W_{out} + strides[1])]`.
-        data_format (str) : The optional value for data format.
+        strides (Union[int, tuple[int]], optional): The strides of kernel moving.
+            If `strides` is 0 or (0, 0), then `strides` equal to `ksize` . Default: 0.
+            - An int number that represents the height and width of movement are both `strides` .
+            - A tuple of two int numbers that represent height and width of movement respectively.
+        pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: 0.
+            - If `pads` is an integer, the paddings of height and width are the same, equal to pads.
+            - If `pads` is a tuple of two integers, the padding of height and width equal to pads[0]
+              and pads[1] correspondingly.
+        output_shape (tuple[int], optional): The target output size is an optional input. Default: ().
+            - If :math:`output\_shape == ()` , then the shape of output computed by `kszie`, `strides` and `pads` .
+            - If :math:`output\_shape != ()` , then `output_shape` must be :math:`(N, C, H, W)` or :math:`(N, H, W, C)`
+              and `output_shape` must belong to :math:`[(N, C, H_{out} - strides[0], W_{out} - strides[1]),
+              (N, C, H_{out} + strides[0], W_{out} + strides[1])]`.
+        data_format (str, optional): The optional value for data format.
             Currently support 'NCHW' and 'NHWC'. Default: 'NCHW'.
     Inputs:
         - **x** (Tensor) - The input Tensor to invert.
           Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C)`.
-        - **argmax** (Tensor) - Max values' index represented by the argmax.
-          Tensor of shape must be same with input 'x'.
-          Values of argmax must belong to :math:`[0, H_{in} \times W_{in} - 1]`.
+        - **argmax** (Tensor) - Max values' index represented by the `argmax`.
+          Tensor of shape must be same with input `x`.
+          Values of `argmax` must belong to :math:`[0, H_{in} \times W_{in} - 1]`.
           Data type must be in int32 or int64.
     Outputs:
@@ -1980,10 +2016,10 @@ class MaxUnpool2D(Primitive):
         ValueError: If `data_format` is not a str or is neither `NCHW` nor `NHWC`.
         ValueError: If `output_shape` whose length is neither 0 or 4.
         ValueError: If `output_shape` is not close to output size
-                    computed by attr `ksize, strides, pads`.
+                    computed by attr `ksize`, `strides` and `pads`.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32))
@@ -2021,12 +2057,9 @@ class MaxUnpool2D(Primitive):
 class MaxUnpool3D(Primitive):
     r"""
-    Computes a partial inverse of MaxUnpool3D.
+    Computes the inverse of :class:`mindspore.ops.MaxPool3D`.
-    MaxUnpool3D is not fully invertible, since the non-maximal values are lost.
-    MaxUnpool3D takes in as input the output of MaxUnpool3D including the indices of the maximal
-    values and computes a partial inverse in which all non-maximal values are set to zero.
+    MaxUnpool3D keeps the maximal value and set all position of non-maximal values to zero.
     Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, the output is of
     shape :math:`(N, C, D_{out}, H_{out}, W_{out})`, the operation is as follows.
@@ -2037,55 +2070,63 @@ class MaxUnpool3D(Primitive):
         W_{out} = (W{in} - 1) \times strides[2] - 2 \times pads[2] + ksize[2] \\
         \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents depth, height and width of the kernel, or a tuple
             of three int numbers that represent depth, height and width respectively.
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            the depth, height and width of movement are both strides, or a tuple of three int numbers that
-            represent depth, height and width of movement respectively.
-            If strides is 0 or (0, 0, 0), then strides equal to ksize. Default: 0.
-        pads (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `pads` is an integer,
-            the paddings of depth, height and width are the same, equal to pads. If `pads` is a tuple of three integers,
-            the padding of depth, height and width equal to pads[0], pads[1] and pads[2] correspondingly.
-        output_shape (tuple[int]) : The target output size is an optional input. Default: ().
-            If output_shape == (), then the shape of output computed by kszie, strides and pads.
-            If output_shape != (), then output_shape must be :math:`(N, C, D, H, W)` or
-            :math:`(N, D, H, W, C)` and output_shape must belong to
+        strides (Union[int, tuple[int]], optional): The distance of kernel moving. Default: 0.
+            - If it is an int number, the depth, height and width of movement are all equal to `strides`.
+            - If it is a tuple of three int numbers, they represent depth, height and width of movement respectively.
+            - If strides is 0 or (0, 0, 0), then `strides` equal to `ksize`.
+        pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: 0.
+            - If `pads` is an integer, the paddings of depth, height and width are the same, equal to pads.
+            - If `pads` is a tuple of three integers, the padding of depth, height and width equal to pads[0],
+              pads[1] and pads[2] correspondingly.
+        output_shape (tuple[int], optional) : The target output size. Default: ().
+            If :math:`output\_shape == ()`, then the shape of output computed by kszie, strides and pads shown above.
+            If :math:`output\_shape != ()`, then output_shape format must be :math:`(N, C, D, H, W)` or
+            :math:`(N, D, H, W, C)` and output_shape must be in range
             :math:`[(N, C, D_{out} - strides[0], H_{out} - strides[1], W_{out} - strides[2]),
             (N, C, D_{out} + strides[0], H_{out} + strides[1], W_{out} + strides[2])]`.
-        data_format (str) : The optional value for data format. Currently support 'NCDHW' and 'NDHWC'. Default: 'NCDHW'.
+        data_format (str, optional) : The optional value for data format. Currently
+            support 'NCDHW' and 'NDHWC'. Default: 'NCDHW'.
     Inputs:
         - **x** (Tensor) - The input Tensor to invert.
           Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`.
-        - **argmax** (Tensor) - Max values' index represented by the argmax.
-          Tensor of shape must be same with input 'x'.
-          Values of argmax must belong to :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`.
-          Data type must be in int32 or int64.
+        - **argmax** (Tensor) - Max values' index. Tensor that has the same shape as `x`.
+          Values of `argmax` must be in range :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`.
+          Data type must be int32 or int64.
     Outputs:
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
         Has the same data type with `x`.
     Raises:
-        TypeError: If data type of `x` or `argmax` is not supported.
+        TypeError: If data type of `x` or `argmax` is Number.
         TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple.
-        ValueError: If numbers in `strides` (also support 0 and (0, 0, 0)) or `ksize` is not positive.
+        ValueError: If numbers in `strides` or `ksize` is negative.
         ValueError: If numbers in `pads` is negative.
         ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 3.
         ValueError: If `data_format` is not a str or is neither `NCDHW` nor `NDHWC`.
         ValueError: If `output_shape` whose length is neither 0 or 5.
-        ValueError: If `output_shape` is not close to output size
+        ValueError: If `output_shape` is not close to output size range
                     computed by attr `ksize, strides, pads`.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32))
         >>> argmax = Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64))
-        >>> maxunpool3d = P.MaxUnpool3D(ksize=1, strides=1, pads=0)
+        >>> maxunpool3d = ops.MaxUnpool3D(ksize=1, strides=1, pads=0)
         >>> output = maxunpool3d(x, argmax)
         >>> print(output.asnumpy())
         [[[[[0. 1.]
@@ -2115,11 +2156,11 @@ class MaxUnpool3D(Primitive):
         self.output_shape = output_shape
-class AvgPool(_Pool):
+class AvgPool(Primitive):
     r"""
     Average pooling operation.
-    Refer to :func:`mindspore.ops.avg_pool2d` for more detail.
+    Refer to :func:`mindspore.ops.avg_pool2d` for more details.
     Args:
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value,
@@ -2180,7 +2221,23 @@ class AvgPool(_Pool):
     @prim_attr_register
     def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
         """Initialize AvgPool."""
-        super(AvgPool, self).__init__(kernel_size, strides, pad_mode, data_format)
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
+        validator.check_value_type('strides', strides, [int, tuple], self.name)
+        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
+        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
+        self.add_prim_attr("pad_mode", self.pad_mode)
+        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
+        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
+            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
+                             f"but got the 'data_format' is {self.format} and "
+                             f"the platform is {context.get_context('device_target')}.")
+        self.add_prim_attr('data_format', self.format)
+        self.kernel_size = _check_positive_int_or_tuple(
+            "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
+        self.add_prim_attr("kernel_size", self.kernel_size)
+        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
+        self.add_prim_attr("strides", self.strides)
 class AvgPoolV1(Primitive):
@@ -2362,6 +2419,22 @@ class MaxPool3DWithArgmax(Primitive):
         \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
         \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
+    The output is a Tensor with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})` and its depth, height and
+    width are:
+    .. math::
+        \begin{array}{ll} \\
+            D_{out} = \frac{D_{in} + 2 \times \text{pads}[0] - \text{dilation}[0] \times (\text{ksize}[0] - 1) - 1}
+                {\text{stride}[0]} + 1 \\
+            H_{out} = \frac{H_{in} + 2 \times \text{pads}[1] - \text{dilation}[1] \times (\text{ksize}[1] - 1) - 1}
+                {\text{stride}[1]} + 1 \\
+            W_{out} = \frac{W_{in} + 2 \times \text{pads}[2] - \text{dilation}[2] \times (\text{ksize}[2] - 1) - 1}
+                {\text{stride}[2]} + 1 \\
+        \end{array}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
             value, is an int number that represents depth, height and width of the kernel, or a tuple of
@@ -2397,7 +2470,7 @@ class MaxPool3DWithArgmax(Primitive):
         ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
     Supported Platforms:
-        ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32)
@@ -2441,13 +2514,15 @@ class MaxPool3DWithArgmax(Primitive):
 class Conv2DTranspose(Conv2DBackpropInput):
     """
-    Compute a 2D transposed convolution, which is also known as a deconvolution
-    (although it is not an actual deconvolution).
+    Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
+    also called deconvolution, although it is not an actual deconvolution. Because it cannot restore
+    the original input data completely, but it can restore the shape of the original input.
     Args:
         out_channel (int): The dimensionality of the output space.
         kernel_size (Union[int, tuple[int]]): The size of the convolution window.
         pad_mode (str): Modes to fill padding. It could be "valid", "same", or "pad". Default: "valid".
+            Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
         pad (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
                     top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, the
                     padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] correspondingly.
@@ -2457,8 +2532,8 @@ class Conv2DTranspose(Conv2DBackpropInput):
         dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
             Default: 1.
         group (int): Splits input into groups. Default: 1.
-        data_format (str): The format of input and output data. It should be 'NHWC' or 'NCHW'，\
-            default is 'NCHW'.
+        data_format (str): The format of input and output data. It should be 'NHWC' or 'NCHW'.
+            Default is 'NCHW'.
     Inputs:
         - **dout** (Tensor) - the gradients with respect to the output of the convolution.
@@ -2512,9 +2587,8 @@ class BiasAdd(Primitive):
     Inputs:
         - **input_x** (Tensor) - The input tensor. The shape can be 2-5 dimensions.
-          The data type should be float16 or float32.
         - **bias** (Tensor) - The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
-          `input_x`. The data type should be float16 or float32.
+          `input_x`.
     Outputs:
         Tensor, with the same shape and data type as `input_x`.
@@ -2544,10 +2618,6 @@ class BiasAdd(Primitive):
         """Initialize BiasAdd."""
         self.init_prim_io_names(inputs=['x', 'b'], outputs=['output'])
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.name)
-        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
-            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
-                             f"but got the 'data_format' is {self.format} and "
-                             f"the platform is {context.get_context('device_target')}.")
         self.add_prim_attr('data_format', self.format)
@@ -2580,7 +2650,8 @@ class NLLLoss(Primitive):
     Inputs:
         - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type only supports float32 or float16.
-        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only supports int32.
+        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N,)`, where each value belong to
+          :math:`[0, C-1]`. Data type only supports int32 or int64.
         - **weight** (Tensor) - The rescaling weight to each class, with shape :math:`(C,)` and data type only
           supports float32 or float16.
@@ -2592,13 +2663,15 @@ class NLLLoss(Primitive):
         - **total_weight** (Tensor) - The `total_weight` is a scalar. The data type is the same with `weight's`.
     Raises:
-        TypeError: If dtype of `logits` or `weight` is neither float16 nor float32, `labels` is not int32.
+        TypeError: If dtype of `logits` or `weight` is neither float16 nor float32.
+        TypeError: If dtype of `labels` is neither int32 nor int64.
         ValueError: If `logits` is not a one or two dimension tensor, `labels` and `weight` are not
                     one dimension tensors.
                     When `logits` is a two dimension tensor, the first dimension of `logits` is not equal to `labels`,
                     and second dimension of `logits` is not equal to `weight`.
                     When `logits` is a one dimension tensor, the dimensions of `logits`, `labels`
                     and `weight` should be equal to each other.
+        ValueError: If the value of `labels` exceed :math:`[0, C-1]`, where :math:`C` is the number of classes.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2673,7 +2746,7 @@ class SoftmaxCrossEntropyWithLogits(Primitive):
         pass
-class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
+class SparseSoftmaxCrossEntropyWithLogits(Primitive):
     r"""
     Computes the softmax cross-entropy value between logits and sparse encoding labels.
@@ -2702,7 +2775,7 @@ class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
         TypeError: If `is_grad` is not a bool.
         TypeError: If dtype of `logits` is neither float16 nor float32.
         TypeError: If dtype of `labels` is neither int32 nor int64.
-        ValueError: If logits.shape[0] != labels.shape[0].
+        ValueError: If :math:`logits.shape[0] != labels.shape[0]`.
     Supported Platforms:
         ``GPU`` ``CPU``
@@ -2729,21 +2802,6 @@ class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
         self.is_grad = is_grad
         self.add_prim_attr('sens', 1.0)
-    def infer_shape(self, logits_shape, labels_shape):
-        validator.check_non_negative_int_sequence(logits_shape, 'dims')
-        validator.check_non_negative_int_sequence(labels_shape, 'dims')
-        validator.check("logits_shape[0]", logits_shape[0], "labels_shape[0]", labels_shape[0], Rel.EQ, self.name)
-        loss_shape = []
-        if self.is_grad:
-            return logits_shape
-        return loss_shape
-    def infer_dtype(self, logits_type, labels_type):
-        validator.check_tensor_dtype_valid("logits", logits_type, (mstype.float16, mstype.float32),
-                                           self.name)
-        validator.check_tensor_dtype_valid("labels", labels_type, (mstype.int32, mstype.int64), self.name)
-        return logits_type
 class SparseSoftmaxCrossEntropyWithLogitsV2(Primitive):
     r"""
@@ -2813,14 +2871,19 @@ class ApplyMomentum(Primitive):
         gradient_scale (float): The scale of the gradient. Default: 1.0.
     Inputs:
-        - **variable** (Parameter) - Weights to be updated. Data type must be float.
+        - **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float, float16,
+          int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
         - **accumulation** (Parameter) - Accumulated gradient value by moment weight,
           has the same data type with `variable`.
-        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float number or
-          a scalar tensor with float data type.
+        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
+          float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
+          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
+          complex64, complex128 data type.
         - **gradient** (Tensor) - Gradient, has the same data type as `variable`.
-        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float number or
-          a scalar tensor with float data type.
+        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float64, int64, float, float16, int16, int32,
+          int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
+          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
+          complex64, complex128 data type.
     Outputs:
         Tensor, parameters to be updated.
@@ -2874,7 +2937,9 @@ class ApplyMomentum(Primitive):
 class SmoothL1Loss(Primitive):
     r"""
-    Refer to :func:`mindspore.ops.smooth_l1_loss` for more detail.
+    Calculate the smooth L1 loss, and the L1 loss function has robustness.
+    Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2892,62 +2957,47 @@ class SmoothL1Loss(Primitive):
     def __init__(self, beta=1.0, reduction='none'):
         """Initialize SmoothL1Loss."""
         validator.check_value_type('beta', beta, [float], self.name)
-        validator.check('beta', beta, '', 0, Rel.GT, self.name)
+        validator.check('beta', beta, '', 0, validator.GT, self.name)
         validator.check_string(
             reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
+        self.add_prim_attr('sigma', self.beta)
         self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
 class MultiMarginLoss(Primitive):
     r"""
-    Creates a criterion that optimizes a multi-class classification hinge
-    loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`) and
-    output :math:`y` (which is a 1D tensor of target class indices,
-    :math:`0 \leq y \leq \text{x.size}(1)-1`):
-    For each mini-batch sample, the loss in terms of the 1D input :math:`x` and scalar
-    output :math:`y` is:
+    Creates a loss function that minimizes the hinge loss
+    for multi-class classification tasks.
+    The loss is calculated by comparing the input and output of the function.
-    .. math::
-        \text{loss}(x, y) = \frac{\sum_i \max(0, w[y] * (\text{margin} - x[y] + x[i]))^p)}{\text{x.size}(0)}
-    where :math:`x \in \left\{0, \; \cdots , \; \text{x.size}(0) - 1\right\}`
-    and :math:`i \neq y`.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
-    Optionally, you can give non-equal weighting on the classes by passing
-    a 1D input `weight` tensor w into the constructor.
+    Refer to :func:`mindspore.ops.multi_margin_loss` for more details.
     Args:
-        p (int): Optional. The norm degree for pairwise distance. Should be 1 or 2. Default: 1.
-        margin (float): Optional. A parameter to change pairwise distance. Default: 1.0.
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
+        p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: 1.
+        margin (int, optional): A parameter to change pairwise distance. Default: 1.0.
+        reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
+            'sum'. Default: 'mean'.
+            - 'none': no reduction will be applied.
+            - 'mean': the sum of the output will be divided by the number of elements in the output.
+            - 'sum': the output will be summed.
     Inputs:
-        - **x** (Tensor) - Input x, with shape :math:`(N, C)`. Data type only support float32, float16 or float64.
+        - **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16 or float64.
         - **target** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The
           value of target should be non-negative, less than C.
-        - **weight** (Tensor, optional) - The rescaling weight to each class with shape :math:`(C,)`. Data type only
-          support float32, float16 or float64. Default: None.
+        - **weight** (Tensor) - The rescaling weight to each class with shape :math:`(C,)`. Data type only
+          support float16, float32 or float64.
     Outputs:
         Tensor, When `reduction` is 'none', the shape is :math:`(N,)`.
-        Otherwise, it is a scalar. Has the same data type with `x`.
-    Raises:
-        TypeError: If dtype of `p` or `target` is not int.
-        TypeError: If dtype of `margin` is not float.
-        TypeError: If dtype of `reduction` is not str.
-        TypeError: If dtype of `x` is not float16, float or float64.
-        TypeError: If dtype of `weight` and `x` is not the same.
-        ValueError: If 'p' is not 1 or 2.
-        ValueError: If 'reduction' is not one of {'none','sum','mean'}.
-        ValueError: If shape[0] of `x` is not equal to shape[0] of `target`.
-        ValueError: If shape[1] of `x` is not equal to shape[0] of `weight`.
-        ValueError: IF rank of `weight` is not 1.
-        ValueError: If rank of `x` is not 2 or rank of 'target' is not 1.
+        Otherwise, it is a scalar. Has the same data type with `inputs`.
     Supported Platforms:
-        ``Ascend``  ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.ones(shape=[3, 3]), mindspore.float32)
@@ -2963,7 +3013,7 @@ class MultiMarginLoss(Primitive):
     def __init__(self, p=1, margin=1.0, reduction="mean"):
         """Initialize MultiMarginLoss"""
         self.p = validator.check_value_type('p', p, [int], self.name)
-        validator.check_int(p, {1, 2}, Rel.IN, 'p', self.name)
+        validator.check_int(p, {1, 2}, validator.IN, 'p', self.name)
         self.margin = validator.check_value_type('margin', margin, [float], self.name)
         self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
         self.init_prim_io_names(inputs=['x', 'target', 'weight'], outputs=['y'])
@@ -3000,7 +3050,7 @@ class SoftMarginLoss(Primitive):
         ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
     Supported Platforms:
-        ``Ascend``
+        ``Ascend`` ``GPU``
     Examples:
         >>> loss = ops.SoftMarginLoss()
@@ -3147,10 +3197,13 @@ class RNNTLoss(PrimitiveWithInfer):
         validator.check_equal_int(len(labels_shape), 2, 'labels_rank', self.name)
         validator.check_equal_int(len(input_length_shape), 1, 'input_length_rank', self.name)
         validator.check_equal_int(len(label_length_shape), 1, 'label_length_rank', self.name)
-        validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
-        validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1', acts_shape[2] - 1, Rel.EQ, self.name)
-        validator.check('input_length size', input_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
-        validator.check('label_length size', label_length_shape[0], 'acts shape[0]', acts_shape[0], Rel.EQ, self.name)
+        validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], validator.EQ, self.name)
+        validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1',
+                        acts_shape[2] - 1, validator.EQ, self.name)
+        validator.check('input_length size', input_length_shape[0], 'acts shape[0]',
+                        acts_shape[0], validator.EQ, self.name)
+        validator.check('label_length size', label_length_shape[0], 'acts shape[0]',
+                        acts_shape[0], validator.EQ, self.name)
         costs_shape = (acts_shape[0],)
         return costs_shape, acts_shape
@@ -3231,13 +3284,10 @@ class SGD(PrimitiveWithCheck):
     def check_shape(self, parameters_shape, gradient_shape, learning_rate_shape,
                     accum_shape, momentum_shape, stat_shape):
-        validator.check_positive_int(len(parameters_shape), "parameters rank", self.name)
-        validator.check_int(len(gradient_shape), 0, Rel.GE, f'gradient rank', self.name)
-        validator.check_int(len(learning_rate_shape), 0, Rel.GE, f'learning rate rank', self.name)
-        validator.check_positive_int(len(accum_shape), "accumulation rank", self.name)
-        validator.check_int(len(momentum_shape), 0, Rel.GE, f'momentum rank', self.name)
-        validator.check_int(len(stat_shape), 0, Rel.GE, f'stat rank', self.name)
-        validator.check("gradient shape", gradient_shape, "stat shape", stat_shape, Rel.EQ, self.name)
+        validator.check_int(len(gradient_shape), 0, validator.GE, f'gradient rank', self.name)
+        validator.check_int(len(learning_rate_shape), 0, validator.GE, f'learning rate rank', self.name)
+        validator.check_int(len(momentum_shape), 0, validator.GE, f'momentum rank', self.name)
+        validator.check_int(len(stat_shape), 0, validator.GE, f'stat rank', self.name)
     def check_dtype(self, parameters_dtype, gradient_dtype, learning_rate_dtype,
                     accum_dtype, momentum_dtype, stat_dtype):
@@ -3278,7 +3328,7 @@ class ApplyRMSProp(PrimitiveWithInfer):
                             from being updated. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated.
+        - **var** (Parameter) - Weights to be updated.
         - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
         - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
@@ -3372,7 +3422,7 @@ class ApplyCenteredRMSProp(Primitive):
                             from being updated. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated.
+        - **var** (Parameter) - Weights to be updated.
         - **mean_gradient** (Tensor) - Mean gradients, must be the same type as `var`.
         - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
@@ -3449,9 +3499,9 @@ class LayerNorm(Primitive):
         - **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
           The input of LayerNorm.
         - **gamma** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
-          The learnable parameter `gamma` as the scale on norm.
+          The learnable parameter :math:`\gamma` as the scale on norm.
         - **beta** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
-          The learnable parameter `beta` as the scale on norm.
+          The learnable parameter :math:`\beta` as the scale on norm.
     Outputs:
         tuple[Tensor], tuple of 3 tensors, the normalized input and the updated parameters.
@@ -3506,13 +3556,17 @@ class L2Normalize(Primitive):
     where :math:`\epsilon` is epsilon and :math:`\sum_{i}^{}\left | x_i  \right | ^2` calculate the sum of squares of
     the input `x` along the dimension `axis`.
+    Note:
+        On Ascend, input data type of float64 is currently not supported.
     Args:
         axis (Union[list(int), tuple(int), int]): Specify the axis for calculating the L2 norm. Default: 0.
         epsilon (float): A small value added for numerical stability. Default: 1e-4.
     Inputs:
-        - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, \ldots)`.
-          Data type must be float16 or float32.
+        - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, *)`,
+          where :math:`*` means any number of additional dimensions.
+          Data type must be float16, float32 or float64.
     Outputs:
         Tensor, with the same type and shape as the `x`.
@@ -3521,7 +3575,7 @@ class L2Normalize(Primitive):
         TypeError: If `axis` is not one of the following: list, tuple or int.
         TypeError: If `epsilon` is not a float.
         TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
+        TypeError: If dtype of `x` is not in [float16, float32, float64].
         ValueError: If dimension of `x` is not greater than 0.
     Supported Platforms:
@@ -3583,57 +3637,14 @@ class DropoutDoMask(Primitive):
 class ResizeBilinear(PrimitiveWithInfer):
     r"""
-    Resizes an image to a certain size using the bilinear interpolation.
-    The resizing only affects the lower two dimensions which represent the height and width. The input images
-    can be represented by different data types, but the data types of output images are always float32.
+    This API is deprecated, please use the :class:`mindspore.ops.ResizeBilinearV2` instead.
+    For general resizing with other interpolation methods, refer to :func:`mindspore.ops.interpolate` for more details.
-    For general resize, refer to :func:`mindspore.ops.interpolate` for more detail.
-    .. warning::
-        This interface does not support dynamic shape and is subject to change or deletion,
-        use :func:`mindspore.ops.interpolate` instead.
-    Args:
-        size (Union[tuple[int], list[int]]): A tuple or list of 2 int elements :math:`(new\_height, new\_width)`,
-            the new size of the images.
-        align_corners (bool): If true, rescale input by :math:`(new\_height - 1) / (height - 1)`,
-                       which exactly aligns the 4 corners of images and resized images. If false,
-                       rescale by :math:`new\_height / height`. Default: False.
-        half_pixel_centers (bool): Whether half pixel center. If set to True, `align_corners` should be False.
-                           Default: False.
-    Inputs:
-        - **x** (Tensor) - Image to be resized. Input images must be a 4-D tensor with shape
-          :math:`(batch, channels, height, width)`, with data type of float32 or float16.
-    Outputs:
-        Tensor, resized image. 4-D with shape :math:`(batch, channels, new\_height, new\_width)`,
-        with the same data type as input `x`.
-    Raises:
-        TypeError: If `size` is neither a tuple nor list.
-        TypeError: If `align_corners` is not a bool.
-        TypeError: If `half_pixel_centers` is not a bool.
-        TypeError: If `align_corners` and `half_pixel_centers` are all True.
-        TypeError: If `half_pixel_centers` is True and device_target not Ascend.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        TypeError: If `x` is not a Tensor.
-        ValueError: If length of shape of `x` is not equal to 4.
+    Note:
+        Dynamic shape feature is not supported for now.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
-    Examples:
-        >>> x = Tensor([[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]]], mindspore.float32)
-        >>> resize_bilinear = ops.ResizeBilinear((5, 5))
-        >>> output = resize_bilinear(x)
-        >>> print(output)
-        [[[[1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]
-           [1. 2. 3. 4. 5.]]]]
+        ``Ascend`` ``GPU`` ``CPU``
     """
     @prim_attr_register
@@ -3649,15 +3660,11 @@ class ResizeBilinear(PrimitiveWithInfer):
                                                              half_pixel_centers, [bool], self.name)
         if half_pixel_centers and align_corners:
             raise ValueError(f"If half_pixel_centers is True, align_corners must be False, but got {align_corners}")
-        target = context.get_context("device_target")
-        if half_pixel_centers and target.lower() != "ascend":
-            raise ValueError(f"Currently `half_pixel_centers`=True only support in Ascend device_target, "
-                             f"but got {target}")
         for i, value in enumerate(size):
             validator.check_positive_int(value, f'{i}th value of size', self.name)
     def infer_shape(self, input_shape):
-        validator.check("dimension of input", len(input_shape), "", 4, Rel.EQ, self.name)
+        validator.check("dimension of input", len(input_shape), "", 4, validator.EQ, self.name)
         input_shape = list(input_shape)
         batch, channel, _, _ = input_shape
         out_shape = [batch, channel]
@@ -3673,7 +3680,7 @@ class ResizeBilinear(PrimitiveWithInfer):
 class UpsampleTrilinear3D(Primitive):
     r"""
-    Performs upsampling with trilinear interpolation across 3dims for 5dim inputs.
+    Performs upsampling with trilinear interpolation across 3dims for 5dim input Tensor.
     This operator scale up the volumetric input with specified `output_size` or `scales` factors,
     using trilinear upscaling algorithm.
@@ -3682,15 +3689,15 @@ class UpsampleTrilinear3D(Primitive):
         One of `scales` and `output_size` MUST be specified and it is an error if both are specified.
     Args:
-        output_size (Union[tuple[int], list[int]]):  A tuple or list of 3 int
+        output_size (Union[tuple[int], list[int]], optional):  A tuple or list of 3 int
             elements :math:`(output\_depth, output\_height, output\_width)`.
             Defaults to None. Only one of `scales` and `output_size` can be specified.
-        scales (Union[tuple[float], list[float]]): A tuple or list of 3 float
+        scales (Union[tuple[float], list[float]], optional): A tuple or list of 3 float
            elements :math:`(scale\_depth, scale\_height, scale\_width)`. Defaults to None.
-        align_corners (bool): An optional bool. Defaults to false.
-            If true, the input and output tensors are aligned by the center points of their corner pixels,
+        align_corners (bool, optional): An optional bool. Defaults to false.
+            If True, the input and output tensors are aligned by the center points of their corner pixels,
             preserving the values at the corner pixels.
-            If false, the input and output tensors are aligned by the corner points of their corner pixels,
+            If False, the input and output tensors are aligned by the corner points of their corner pixels,
             and the interpolation use edge value padding for out of boundary values.
     Inputs:
@@ -3702,8 +3709,8 @@ class UpsampleTrilinear3D(Primitive):
           Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})`.
     Raises:
-        TypeError: When `output_size` is not none and `output_size` is not list[int] or tuple[int].
-        TypeError: When `scales` is not none and `scales` is not list[float] or tuple[float].
+        TypeError: When `output_size` is not None and `output_size` is not list[int] or tuple[int].
+        TypeError: When `scales` is not None and `scales` is not list[float] or tuple[float].
         TypeError: If dtype of `x` is not in [float16, float32, float64].
         TypeError: If type of `align_corners` is not bool.
         ValueError: If any value of `output_size` is negative or zero when `output_size` is not empty.
@@ -3714,22 +3721,24 @@ class UpsampleTrilinear3D(Primitive):
         ValueError: If size of `output_size` is not equal 3 when `output_size` is specified.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
     Examples:
-        >>> ops = ops.UpsampleTrilinear3D(output_size=[4, 64, 48])
-        >>> out = ops(Tensor(input_data=np.random.randn(2, 3, 4, 512, 256)))
+        >>> net = ops.UpsampleTrilinear3D(output_size=[4, 64, 48])
+        >>> in_x = Tensor(input_data=np.random.randn(2, 3, 4, 512, 256))
+        >>> out = net(in_x)
         >>> print(out.shape)
         (2, 3, 4, 64, 48)
-        ...
-        >>> ops = ops.UpsampleTrilinear3D(output_size=[2, 4, 4])
+        >>>
+        >>> net = ops.UpsampleTrilinear3D(output_size=[2, 4, 4])
         >>> in_x = Tensor(np.arange(1, 5, dtype=np.float32).reshape((1, 1, 1, 2, 2)))
-        >>> out = ops(in_x)
+        >>> out = net(in_x)
         >>> print(out)
         [[[[[1.   1.25 1.75 2.  ]
             [1.5  1.75 2.25 2.5 ]
             [2.5  2.75 3.25 3.5 ]
             [3.   3.25 3.75 4.  ]]
            [[1.   1.25 1.75 2.  ]
             [1.5  1.75 2.25 2.5 ]
             [2.5  2.75 3.25 3.5 ]
@@ -3900,7 +3909,7 @@ class FastGeLU(Primitive):
     r"""
     Fast Gaussian Error Linear Units activation function.
-    Refer to :func:`mindspore.ops.fast_gelu` for more detail.
+    Refer to :func:`mindspore.ops.fast_gelu` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -3970,7 +3979,7 @@ class GetNext(Primitive):
         """Initialize GetNext."""
         validator.check_value_type("types", types, [list, tuple], self.name)
         validator.check_value_type("shapes", shapes, [list, tuple], self.name)
-        validator.check("types length", len(types), "shapes length", len(shapes), Rel.EQ, self.name)
+        validator.check("types length", len(types), "shapes length", len(shapes), validator.EQ, self.name)
         validator.check_value_type("output_num", output_num, [int], self.name)
@@ -3981,7 +3990,7 @@ class PReLU(PrimitiveWithInfer):
     Refer to :func:`mindspore.ops.prelu` for more details.
     Supported Platforms:
-        ``Ascend`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -4010,11 +4019,11 @@ class PReLU(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['x', 'weight'], outputs=['output'])
-class LSTM(PrimitiveWithInfer):
-    """
+class LSTM(Primitive):
+    r"""
     Performs the Long Short-Term Memory (LSTM) on the input.
-    For detailed information, please refer to :class:`mindspore.nn.LSTM`.
+    For detailsed information, please refer to :class:`mindspore.nn.LSTM`.
     Args:
         input_size (int): Number of features of input.
@@ -4026,20 +4035,20 @@ class LSTM(PrimitiveWithInfer):
             LSTM layer except the last layer. The range of dropout is [0.0, 1.0].
     Inputs:
-        - **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`) or
-          (batch_size, seq_len, `input_size`).
-        - **h** (tuple) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **c** (tuple) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
+        - **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or
+          :math:`(batch\_size, seq\_len, input\_size)`.
+        - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
         - **w** (Tensor) - A weight Tensor.
     Outputs:
         Tuple, a tuple contains (`output`, `h_n`, `c_n`, `reserve`, `state`).
-        - **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
-        - **h_n** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **c_n** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-        - **reserve** (Tensor) - Tensor of shape (r, 1).
-        - **state** (Tensor) - Random number generator state and its shape is (s, 1).
+        - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)`.
+        - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
+        - **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`.
+        - **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`.
     Raises:
         TypeError: If `input_size`, `hidden_size` or `num_layers` is not an int.
@@ -4085,37 +4094,13 @@ class LSTM(PrimitiveWithInfer):
         self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
         self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
         self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
-        self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
+        self.dropout = validator.check_float_range(dropout, 0, 1, validator.INC_BOTH, 'dropout', self.name)
         if bidirectional:
             self.num_directions = 2
         else:
             self.num_directions = 1
-    def infer_shape(self, x_shape, h_shape, c_shape, w_shape):
-        validator.check_equal_int(len(x_shape), 3, "x rank", self.name)
-        validator.check_equal_int(x_shape[2], self.input_size, "x[2]", self.name)
-        # h and c should be same shape
-        validator.check_equal_int(len(h_shape), 3, "h rank", self.name)
-        validator.check("h_shape", h_shape, "c_shape", c_shape, Rel.EQ, self.name)
-        validator.check_int(h_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h[0]", self.name)
-        validator.check_equal_int(h_shape[1], x_shape[1], "h[1]", self.name)
-        validator.check_int(h_shape[2], self.hidden_size, Rel.EQ, "h[2]", self.name)
-        y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
-        # set arbitrary shape for reserved space
-        reserved_shape = (1, 1)
-        state_shape = (1, 1)
-        return y_shape, h_shape, c_shape, reserved_shape, state_shape
-    def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype):
-        args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name)
-        return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype
 class SigmoidCrossEntropyWithLogits(Primitive):
     r"""
@@ -4134,7 +4119,7 @@ class SigmoidCrossEntropyWithLogits(Primitive):
         \end{array}
     Inputs:
-        - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means, any number
+        - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means any number
           of additional dimensions.
         - **label** (Tensor) - Ground truth label. With the same shape and type as `logits`.
@@ -4174,7 +4159,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
         \begin{array}{ll} \\
             p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}} \\
-            L_{ij} = -[Y_{ij} * log(p_{ij}) + (1 - Y_{ij})log(1 - p_{ij})]
+            L_{ij} = -[Y_{ij}log(p_{ij}) + (1 - Y_{ij})log(1 - p_{ij})]
         \end{array}
     :math:`i` indicates the :math:`i^{th}` sample, :math:`j` indicates the category. Then,
@@ -4192,8 +4177,8 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
     and the third method is to calculate the sum of all losses.
     This operator will multiply the output by the corresponding weight.
-    The tensor weight assigns different weights to each piece of data in the batch,
-    and the tensor pos_weight adds corresponding weights to the positive examples of each category.
+    The tensor `weight` assigns different weights to each piece of data in the batch,
+    and the tensor `pos_weight` adds corresponding weights to the positive examples of each category.
     In addition, it can trade off recall and precision by adding weights to positive examples.
     In the case of multi-label classification the loss can be described as:
@@ -4205,8 +4190,8 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
         \end{array}
     where c is the class number (c>1 for multi-label binary classification, c=1 for single-label binary classification),
-    n is the number of the sample in the batch and :math:`p_c` is the weight of the positive answer for the class c.
-    :math:`p_c>1` increases the recall, :math:`p_c<1` increases the precision.
+    n is the number of the sample in the batch and :math:`P_c` is the weight of the positive answer for the class c.
+    :math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
     Args:
         reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none',
@@ -4259,7 +4244,7 @@ class Pad(Primitive):
     r"""
     Pads the input tensor according to the paddings.
-    Refer to :func:`mindspore.ops.pad` for more detail. Use :func:`mindspore.ops.pad` instead if `paddings` has
+    Refer to :func:`mindspore.ops.pad` for more details. Use :func:`mindspore.ops.pad` instead if `paddings` has
     negative values.
     Args:
@@ -4269,8 +4254,8 @@ class Pad(Primitive):
             be extended behind the input tensor in the `D` th dimension.
     Inputs:
-        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
+        - **input_x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
+          any number of additional dimensions.
     Outputs:
         Tensor, the tensor after padding.
@@ -4306,20 +4291,34 @@ class Pad(Primitive):
 class PadV3(Primitive):
     """
-    Pads the input tensor according to the paddings, mode and paddings_contiguous.
+    Pads the input Tensor according to the `paddings`, `mode` and `paddings_contiguous`.
     Args:
-        mode (str): An optional string, Defaults to "constant", indicates padding mode,
-            support "constant", "reflect", "edge", Defaults to "constant".
-        paddings_contiguous (bool): An optional bool value, Defaults to True.
+        mode (str, optional): An optional string indicates padding mode,
+            support "constant", "reflect", "edge", "circular". Default: "constant".
+            The effects of various padding modes are as follows:
+            - "constant": Pads the input Tensor with value specified by `constant_value`.
+            - "reflect": Pads the input Tensor by reflecting the values of the pixels at the
+              boundary of the Tensor.
+            - "edge": Pads the input Tensor with the values of the pixels on the border of the Tensor.
+            - "circular": Circular padding mode. In this mode, the pixels from one edge of the image
+              are wrapped around to the opposite edge, such that the pixel on the right edge of the
+              image is replaced with the pixel on the left edge, and the pixel on the bottom edge
+              is replaced with the pixel on the top edge.
+        paddings_contiguous (bool, optional): An optional bool value indicates if the padding is paddings_contiguous.
             If true, paddings is arranged as [begin0, end0, begin1, end1, ...]
             If false, paddings is arranged as [begin0, begin1, ..., end1, end2, ...]
+            Default:True.
     Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions.
-        - **paddings** (Tensor) - Only constant value is allowed. A 1D tensor of type int32 or int64.
-        - **constant_value** (Tensor, optional) - A tensor with the same type as `x`, padding value in 'constant' mode.
+        - **x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
+          any number of additional dimensions.
+        - **paddings** (Tensor) -  Specifies the number of zeros to be padded before and after each
+          dimension of the input Tensor `x`. It's a 1D Tensor of type int32 or int64.
+        - **constant_value** (Tensor, optional) - Padding value to use in 'constant' mode,
+          if not specified, 0 is used instead. It has the same type as `x`.
     Outputs:
         Tensor, the tensor after padding.
@@ -4328,16 +4327,19 @@ class PadV3(Primitive):
         TypeError: If `x` or `paddings` is not a Tensor.
         TypeError: If `padding_contiguous` is not a bool.
         ValueError: If `mode` is not a str or not in support modes.
-        ValueError: If `mode` is constant, the element's number of paddings not be even.
-        ValueError: If `mode` is constant, the element's number of paddings large than input dim * 2.
-        ValueError: If `mode` is edge or reflect, the element's number of paddings is not 2, 4 or 6.
-        ValueError: If `mode` is edge or reflect, x dims equal 3, the element's number of paddings is 2.
-        ValueError: If `mode` is edge or reflect, x dims equal 4, the element's number of paddings is 4.
-        ValueError: If `mode` is edge or reflect, x dims smaller than 3.
-        ValueError: If `mode` is edge, x dims bigger than 5.
-        ValueError: If `mode` is reflect, x dims bigger than 4.
-        ValueError: If `mode` is reflect, padding size bigger than the corresponding x dimension.
-        ValueError: After padding, output's shape number must be greater than 0.
+        ValueError: If `mode` is "constant", the element's number of `paddings` not be even.
+        ValueError: If `mode` is "constant", the element's number of `paddings` large than input dim * 2.
+        ValueError: If `mode` is "edge" "reflect" or "circular", the element's number of `paddings` is not 2, 4 or 6.
+        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 3,
+            the element's number of `paddings` is not 2.
+        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 4,
+            the element's number of `paddings` is not 4.
+        ValueError: If `mode` is "circular", `x` dims equals 5, the element's number of `paddings` is not 6.
+        ValueError: If `mode` is "edge", "reflect" or "circular", `x` dims smaller than 3.
+        ValueError: If `mode` is "edge" or "circular", x dims bigger than 5.
+        ValueError: If `mode` is "reflect", x dims bigger than 4.
+        ValueError: If `mode` is "reflect", padding size bigger than the corresponding `x` dimension.
+        ValueError: After padding, output's shape number is not greater than 0.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -4378,10 +4380,9 @@ class PadV3(Primitive):
     def __init__(self, mode='constant', paddings_contiguous=True):
         """Initialize PadV3"""
         self.init_prim_io_names(inputs=['x', 'paddings', 'constant_value'], outputs=['y'])
-        validator.check_string(mode, ['constant', 'reflect', 'edge'], 'mode', self.name)
+        validator.check_string(mode, ['constant', 'reflect', 'edge', 'circular'], 'mode', self.name)
         validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name)
         self.mode = mode
-        self.set_const_input_indexes([1])
         self.paddings_contiguous = paddings_contiguous
@@ -4397,7 +4398,7 @@ class MirrorPad(Primitive):
         - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions.
         - **paddings** (Tensor) - Paddings requires constant tensor. The value of `paddings` is a
-          matrix(list), and its shape is (N, 2). N is the rank of input data. All elements of paddings
+          matrix(list), and its shape is :math:`(N, 2)`. N is the rank of input data. All elements of paddings
           are int type. For the input in the `D` th dimension, paddings[D, 0] indicates how many sizes
           to be extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1]
           indicates how many sizes to be extended behind the input tensor in the `D` th dimension. Both
@@ -4463,7 +4464,6 @@ class MirrorPad(Primitive):
         self.init_prim_io_names(inputs=['x', 'paddings'], outputs=['y'])
         validator.check_string(mode, ['REFLECT', 'SYMMETRIC'], 'mode', self.name)
         self.mode = mode
-        self.set_const_input_indexes([1])
 class ComputeAccidentalHits(PrimitiveWithCheck):
@@ -4519,13 +4519,14 @@ class ComputeAccidentalHits(PrimitiveWithCheck):
         self.init_prim_io_names(inputs=['true_classes', 'sampled_candidates'],
                                 outputs=['indices', 'ids', 'weights'])
         validator.check_value_type("num_true", num_true, [int], self.name)
-        validator.check_number("num_true", num_true, 1, Rel.GE, self.name)
+        validator.check_number("num_true", num_true, 1, validator.GE, self.name)
         self.num_true = num_true
     def check_shape(self, true_classes_shape, sampled_candidates_shape):
-        validator.check_int(len(true_classes_shape), 2, Rel.EQ, 'dim of true_classes', self.name)
-        validator.check_int(len(sampled_candidates_shape), 1, Rel.EQ, 'dim of sampled_candidates', self.name)
-        validator.check("true_classes shape[1]", true_classes_shape[1], "num_true", self.num_true, Rel.EQ, self.name)
+        validator.check_int(len(true_classes_shape), 2, validator.EQ, 'dim of true_classes', self.name)
+        validator.check_int(len(sampled_candidates_shape), 1, validator.EQ, 'dim of sampled_candidates', self.name)
+        validator.check("true_classes shape[1]", true_classes_shape[1], "num_true",
+                        self.num_true, validator.EQ, self.name)
         indices_len = -1
         return (indices_len,), (indices_len,), (indices_len,)
@@ -4597,7 +4598,7 @@ class ROIAlign(Primitive):
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
         validator.check_value_type("sample_num", sample_num, [int], self.name)
         validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
-        validator.check_int_range(roi_end_mode, 0, 1, Rel.INC_BOTH, "roi_end_mode", self.name)
+        validator.check_int_range(roi_end_mode, 0, 1, validator.INC_BOTH, "roi_end_mode", self.name)
         self.pooled_height = pooled_height
         self.pooled_width = pooled_width
         self.spatial_scale = spatial_scale
@@ -4639,22 +4640,19 @@ class Adam(Primitive):
             If false, update the gradients without using NAG. Default: False.
     Inputs:
-        - **var** (Tensor) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
+        - **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
           any number of additional dimensions. The data type can be float16 or float32.
-        - **m** (Tensor) - The 1st moment vector in the updating formula,
-          the shape and data type value should be the same as `var`.
-        - **v** (Tensor) - the 2nd moment vector in the updating formula,
-          the shape and data type value should be the same as `var`. Mean square gradients with the same type as `var`.
-        - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
-          the data type value should be the same as `var`.
-        - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula,
-          the data type value should be the same as `var`.
-        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`,
-          the data type value should be the same as `var`.
-        - **beta1** (float) - The exponential decay rate for the 1st moment estimations,
-          the data type value should be the same as `var`. The paper suggested value is :math:`0.9`.
-        - **beta2** (float) - The exponential decay rate for the 2nd moment estimations,
-          the data type value should be the same as `var`. The paper suggested value is :math:`0.999`.
+        - **m** (Parameter) - The 1st moment vector in the updating formula,
+          the shape should be the same as `var`.
+        - **v** (Parameter) - the 2nd moment vector in the updating formula,
+          the shape should be the same as `var`.
+        - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
+        - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
+        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`.
+        - **beta1** (float) - The exponential decay rate for the 1st moment estimations.
+          The paper suggested value is :math:`0.9`.
+        - **beta2** (float) - The exponential decay rate for the 2nd moment estimations.
+          The paper suggested value is :math:`0.999`.
         - **epsilon** (float) - Term added to the denominator to improve numerical stability.
         - **gradient** (Tensor) - Gradient, has the same shape and data type as `var`.
@@ -4667,7 +4665,7 @@ class Adam(Primitive):
     Raises:
         TypeError: If neither `use_locking` nor `use_nesterov` is a bool.
-        TypeError: If `var`, `m` or `v` is not a Tensor.
+        TypeError: If `var`, `m` or `v` is not a Parameter.
         TypeError: If `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient` is not a Tensor.
     Supported Platforms:
@@ -4693,6 +4691,18 @@ class Adam(Primitive):
         [[0.9996838 0.9996838]
          [0.9996838 0.9996838]]
     """
+    __mindspore_signature__ = (
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T1),
+        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T2),
+        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T3),
+        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T4),
+        sig.make_sig('lr', dtype=sig.sig_dtype.T5),
+        sig.make_sig('beta1', dtype=sig.sig_dtype.T6),
+        sig.make_sig('beta2', dtype=sig.sig_dtype.T7),
+        sig.make_sig('epsilon', dtype=sig.sig_dtype.T8),
+        sig.make_sig('gradient', dtype=sig.sig_dtype.T)
+    )
     @prim_attr_register
     def __init__(self, use_locking=False, use_nesterov=False):
@@ -4702,7 +4712,7 @@ class Adam(Primitive):
         self.add_prim_attr('side_effect_mem', True)
-class AdamWeightDecay(PrimitiveWithInfer):
+class AdamWeightDecay(Primitive):
     r"""
     Updates gradients by the Adaptive Moment Estimation algorithm with weight decay (AdamWeightDecay).
@@ -4744,7 +4754,7 @@ class AdamWeightDecay(PrimitiveWithInfer):
           it should have the the shape as `var`. The data type can be float16 or float32.
         - **v** (Parameter) - The 2nd moment vector in the updating formula,
           it should have the same shape and dtype as `m`.
-        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`,
+        - **lr** (float) - :math:`lr` in the updating formula. The paper suggested value is :math:`10^{-8}`,
           the data type should be float32.
         - **beta1** (float) - The exponential decay rate for the 1st moment estimations,
           the data type should be float32. The paper suggested value is :math:`0.9`
@@ -4755,6 +4765,7 @@ class AdamWeightDecay(PrimitiveWithInfer):
         - **decay** (float) - The weight decay value, must be a scalar tensor with float32 data type.
           Default: 0.0.
         - **gradient** (Tensor) - Gradient, has the same shape and data type as `var`.
     Outputs:
         Tuple of 3 Tensor, the updated parameters.
@@ -4762,6 +4773,15 @@ class AdamWeightDecay(PrimitiveWithInfer):
         - **m** (Tensor) - The same shape and data type as `m`.
         - **v** (Tensor) - The same shape and data type as `v`.
+    Raises:
+        TypeError: If `use_locking` is not a bool.
+        TypeError: If `lr`, `beta1`, `beta2`, `epsilon` or `decay` is not a float32.
+        TypeError: If `var`, `m` or `v` is not a Parameter with dtype float16 or float32.
+        TypeError: If `gradient` is not a Tensor.
+        ValueError: - If `eps` <= 0.
+        ValueError: - If `beta1`, `beta2` is not in range (0.0,1.0).
+        ValueError: - If `decay` < 0.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -4805,28 +4825,8 @@ class AdamWeightDecay(PrimitiveWithInfer):
         self.add_prim_attr('side_effect_mem', True)
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
-    def infer_shape(self, var_shape, m_shape, v_shape, lr_shape, beta1_shape, beta2_shape,
-                    epsilon_shape, decay_shape, grad_shape):
-        validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name)
-        validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name)
-        validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
-        return var_shape, m_shape, v_shape
-    def infer_dtype(self, var_dtype, m_dtype, v_dtype, lr_dtype, beta1_dtype, beta2_dtype,
-                    epsilon_dtype, decay_dtype, grad_dtype):
-        args = {"var": var_dtype, "grad": grad_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, mstype.number_type, self.name)
-        args = {"m": m_dtype, "v": v_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, mstype.number_type, self.name)
-        args = {"lr": lr_dtype, "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype,
-                "decay": decay_dtype}
-        validator.check_scalar_or_tensor_types_same(args, [mstype.float32], self.name, True)
-        return var_dtype, m_dtype, v_dtype
-class AdamNoUpdateParam(PrimitiveWithInfer):
+class AdamNoUpdateParam(Primitive):
     r"""
     Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This operator do not update the parameter, but
     calculate the value that should be added to the parameter instead.
@@ -4921,20 +4921,6 @@ class AdamNoUpdateParam(PrimitiveWithInfer):
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
         validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
-    def infer_shape(self, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
-                    beta1_shape, beta2_shape, epsilon_shape, grad_shape):
-        validator.check("grad_shape", grad_shape, "m_shape", m_shape, Rel.EQ, self.name)
-        validator.check("grad_shape", grad_shape, "v_shape", v_shape, Rel.EQ, self.name)
-        return grad_shape
-    def infer_dtype(self, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype,
-                    beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype):
-        args = {"m": m_dtype, "v": v_dtype, "grad": grad_dtype,
-                "beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype,
-                "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, [mstype.float32], self.name)
-        return grad_dtype
 class FusedSparseAdam(Primitive):
     r"""
@@ -5279,7 +5265,7 @@ class FusedSparseFtrl(Primitive):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
@@ -5393,14 +5379,14 @@ class KLDivLoss(Primitive):
         \operatorname{sum}(L(x, target)),  & \text{if reduction} = \text{'sum'.}
         \end{cases}
-    where :math:`x` represents `logits`.
-    :math:`target` represents `labels`.
+    where :math:`x` represents `logits`,
+    :math:`target` represents `labels`, and
     :math:`\ell(x, target)` represents `output`.
     Note:
-        On Ascend, float64 dtype is not currently supported.
-        The output aligns with the mathematical definition of KL divergence
-        only when `reduction` is set to 'batchmean'.
+        - On Ascend, float64 dtype is not currently supported.
+        - The output aligns with the mathematical definition of Kullback-Leibler divergence
+          only when `reduction` is set to 'batchmean'.
     Args:
         reduction (str): Specifies the reduction to be applied to the output.
@@ -5454,7 +5440,7 @@ class KLDivLoss(Primitive):
         elif device_target == "GPU":
             support_mode = ['none', 'mean', 'sum']
         elif device_target == "Ascend":
-            support_mode = ['none', 'batchmean', 'sum']
+            support_mode = ['none', 'batchmean', 'sum', 'mean']
         else:
             raise ValueError(f"'{self.name}' unknown device target: '{device_target}'")
@@ -5473,7 +5459,8 @@ class BinaryCrossEntropy(Primitive):
         l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right]
     In which, :math:`L` indicates the loss of all batch_sizes, :math:`l` indicates the loss of one batch_size,
-    and n indicates one batch_size in the 1-N range. Then,
+    and n indicates one batch_size in the 1-N range, :math:`w_n` indicates the
+    weight of :math:`n`-th batch of binary cross entropy. Then,
     .. math::
         \ell(x, y) = \begin{cases}
@@ -5483,23 +5470,22 @@ class BinaryCrossEntropy(Primitive):
         \end{cases}
     .. warning::
-        - The value of "x" must range from 0 to 1.
-        - The value of "y" must be "0" or "1".
+        - The value of :math:`x` must range from 0 to 1.
     Args:
         reduction (str): Specifies the reduction to be applied to the output.
             Its value must be one of 'none', 'mean' or 'sum'. Default: 'mean'.
     Inputs:
-        - **logits** (Tensor) - The input Tensor. The data type must be float16 or float32,
+        - **logits** (Tensor) - The predictive value whose data type must be float16 or float32,
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **labels** (Tensor) - The label Tensor which has the same shape and data type as `logits`.
+        - **labels** (Tensor) - The target value which has the same shape and data type as `logits`.
         - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
           And it must have the same shape and data type as `logits`. Default: None.
     Outputs:
-        Tensor, has the same dtype as `logits`. if `reduction` is 'none', then it has the same shape as `logits`.
-        Otherwise, it is a scalar Tensor.
+        Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
+        Otherwise, returns a scalar Tensor.
     Raises:
         TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
@@ -5594,7 +5580,7 @@ class ApplyAdaMax(Primitive):
         RuntimeError: If the data type of `var`, `m`, `v` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -5773,10 +5759,10 @@ class ApplyAdagrad(Primitive):
         update_slots (bool): If `True`, `accum` will be updated. Default: True.
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
+        - **var** (Parameter) - Variable to be updated. With float or complex data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float32 or float16 data type.
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
         - **grad** (Tensor) - A tensor for gradient. The shape and data type must be the same as `var`.
     Outputs:
@@ -5786,12 +5772,12 @@ class ApplyAdagrad(Primitive):
         - **accum** (Tensor) - The same shape and data type as `accum`.
     Raises:
-        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float16 nor float32.
+        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float nor complex.
         TypeError: If `lr` is neither a Number nor a Tensor.
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -5850,7 +5836,7 @@ class ApplyAdagradV2(Primitive):
     the relatively highest priority data type.
     Note:
-        The difference is that `ApplyAdagradV2` has one more small constant value than `ApplyAdagrad`.
+        The difference is that `ApplyAdagradV2` has one more small constant value :math:`\epsilon` than `ApplyAdagrad`.
     Args:
         epsilon (float): A small value added for numerical stability.
@@ -5971,7 +5957,7 @@ class SparseApplyAdagradV2(Primitive):
         - **grad** (Tensor) - Gradients has the same data type as `var` and
           :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
-          The type must be int32 and indices.shape[0] = grad.shape[0].
+          The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         Tuple of 2 tensors, the updated parameters.
@@ -5987,7 +5973,7 @@ class SparseApplyAdagradV2(Primitive):
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class Net(nn.Cell):
@@ -6123,9 +6109,10 @@ class ApplyProximalAdagrad(Primitive):
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
-class SparseApplyProximalAdagrad(PrimitiveWithCheck):
+class SparseApplyProximalAdagrad(Primitive):
     r"""
-    Updates relevant entries according to the proximal adagrad algorithm. Compared with ApplyProximalAdagrad,
+    Updates relevant entries according to the proximal adagrad algorithm.
+    Compared with :class:`mindspore.ops.ApplyProximalAdagrad`,
     an additional index tensor is input.
     .. math::
@@ -6149,16 +6136,16 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **accum** (Parameter) - Variable tensor to be updated, has the same shape and dtype as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
-          a scalar tensor with float16 or float32 data type.
+          a scalar tensor with float16 or float32 data type. It must be positive.
         - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
-          a scalar tensor with float16 or float32 data type.
+          a scalar tensor with float16 or float32 data type. It must be non-negative.
         - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
-          a scalar tensor with float16 or float32 data type.
+          a scalar tensor with float16 or float32 data type. It must be non-negative.
         - **grad** (Tensor) - A tensor of the same type as `var` and
-          grad.shape[1:] = var.shape[1:] if var.shape > 1.
+          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
           If there are duplicates in `indices`, the behavior is undefined. Must be one of the
-          following types: int32, int64 and indices.shape[0] = grad.shape[0].
+          following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         Tuple of 2 tensors, the updated parameters.
@@ -6170,6 +6157,7 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
         TypeError: If `use_locking` is not a bool.
         TypeError: If dtype of `var`, `accum`, `lr`, `l1`, `l2` or `grad` is neither float16 nor float32.
         TypeError: If dtype of `indices` is neither int32 nor int64.
+        ValueError: If `lr` <= 0 or `l1` < 0 or `l2` < 0.
         RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
@@ -6220,20 +6208,6 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
         self.add_prim_attr('side_effect_mem', True)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
-    def check_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape,
-                    grad_shape, indices_shape):
-        validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
-    def check_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype,
-                    grad_dtype, indices_dtype):
-        args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
-        validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name)
-        validator.check_scalar_or_tensor_types_same({"lr": lr_dtype}, [mstype.float16, mstype.float32], self.name)
-        validator.check_scalar_or_tensor_types_same({"l1": l1_dtype}, [mstype.float16, mstype.float32], self.name)
-        validator.check_scalar_or_tensor_types_same({"l2": l2_dtype}, [mstype.float16, mstype.float32], self.name)
-        valid_dtypes = [mstype.int32, mstype.int64]
-        validator.check_tensor_dtype_valid('indices', indices_dtype, valid_dtypes, self.name)
 class ApplyAddSign(Primitive):
     r"""
@@ -6496,7 +6470,7 @@ class ApplyProximalGradientDescent(Primitive):
     r"""
     Updates relevant entries according to the FOBOS(Forward Backward Splitting) algorithm.
     Refer to the paper `Efficient Learning using Forward-Backward Splitting
-    <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_ for more detail.
+    <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_ for more details.
     .. math::
         \begin{array}{ll} \\
@@ -6636,6 +6610,10 @@ class ApplyFtrl(Primitive):
     For more details, please refer to :class:`mindspore.nn.FTRL`.
+    Note:
+        Currently, only positive numbers are supported on the Ascend platform,
+        and the calculation results for other scenarios are not defined.
     Args:
         use_locking (bool): Use locks for updating operation if true . Default: False.
@@ -6664,6 +6642,9 @@ class ApplyFtrl(Primitive):
         TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
         TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
         TypeError: If `grad` is not a Tensor.
+        RuntimeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
+        RuntimeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
+                      and the precision is greater than `var`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6697,6 +6678,17 @@ class ApplyFtrl(Primitive):
          [ 0.00066425 0.15075898]]
     """
+    __mindspore_signature__ = (
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad', dtype=sig.sig_dtype.T),
+        sig.make_sig('lr', dtype=sig.sig_dtype.T),
+        sig.make_sig('l1', dtype=sig.sig_dtype.T),
+        sig.make_sig('l2', dtype=sig.sig_dtype.T),
+        sig.make_sig('lr_power', dtype=sig.sig_dtype.T)
+    )
     @prim_attr_register
     def __init__(self, use_locking=False):
         """Initialize ApplyFtrl."""
@@ -6721,17 +6713,18 @@ class SparseApplyFtrl(Primitive):
         l2 (float): l2 regularization strength, must be greater than or equal to zero.
         lr_power (float): Learning rate power controls how the learning rate decreases during training,
             must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
-        use_locking (bool): Use locks for updating operation if true . Default: False.
+        use_locking (bool, optional): Use locks for updating operation if true . Default: False.
     Inputs:
         - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
         - **linear** (Parameter) - The linear coefficient to be updated, must be the same data type and shape as `var`.
-        - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if var.shape > 1.
+        - **grad** (Tensor) - A tensor of the same type as `var` and :math:`grad.shape[1:] = var.shape[1:]`
+          if var.shape > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
           If there are duplicates in `indices`, the behavior is undefined.
-          The type must be int32 or int64 and indices.shape[0] = grad.shape[0].
+          The type must be int32 or int64 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
         - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
@@ -6790,7 +6783,7 @@ class SparseApplyFtrl(Primitive):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
         self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
                                 outputs=['var', 'accum', 'linear'])
@@ -6799,74 +6792,10 @@ class SparseApplyFtrl(Primitive):
 class SparseApplyFtrlV2(PrimitiveWithInfer):
     """
-    Updates relevant entries according to the FTRL-proximal scheme. This class has one more attribute, named
-    l2_shrinkage, than class SparseApplyFtrl.
-    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, the lower priority data type will be converted to
-    the relatively highest priority data type.
-    Args:
-        lr (float): The learning rate value, must be positive.
-        l1 (float): l1 regularization strength, must be greater than or equal to zero.
-        l2 (float): l2 regularization strength, must be greater than or equal to zero.
-        l2_shrinkage (float): L2 shrinkage regularization.
-        lr_power (float): Learning rate power controls how the learning rate decreases during training,
-            must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
-        use_locking (bool): If `True`, the var and accumulation tensors will be protected from being updated.
-            Default: False.
-    Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
-          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - The accumulation to be updated, must be same data type and shape as `var`.
-        - **linear** (Parameter) - the linear coefficient to be updated, must be same data type and shape as `var`.
-        - **grad** (Tensor) - A tensor of the same type as `var` and
-          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
-        - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
-          The type must be int32 and indices.shape[0] = grad.shape[0].
-    Outputs:
-        Tuple of 3 Tensor, the updated parameters.
-        - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
-        - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`.
-        - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`.
-    Raises:
-        TypeError: If `lr`, `l1`, `l2`, `lr_power` or `use_locking` is not a float.
-        TypeError: If `use_locking` is not a bool.
-        TypeError: If dtype of `var`, `accum`, `linear` or `grad` is neither float16 nor float32.
-        TypeError: If dtype of `indices` is not int32.
-        RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported.
+    The SparseApplyFtrlV2 interface is deprecated, please use the :class:`mindspore.ops.SparseApplyFtrl` instead.
     Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> class SparseApplyFtrlV2Net(nn.Cell):
-        ...     def __init__(self):
-        ...         super(SparseApplyFtrlV2Net, self).__init__()
-        ...         self.sparse_apply_ftrl_v2 = ops.SparseApplyFtrlV2(lr=0.01, l1=0.0, l2=0.0,
-        ...                                                         l2_shrinkage=0.0, lr_power=-0.5)
-        ...         self.var = Parameter(Tensor(np.array([[0.2, 0.3]]).astype(np.float32)), name="var")
-        ...         self.accum = Parameter(Tensor(np.array([[0.5, 0.9]]).astype(np.float32)), name="accum")
-        ...         self.linear = Parameter(Tensor(np.array([[0.7, 0.5]]).astype(np.float32)), name="linear")
-        ...
-        ...     def construct(self, grad, indices):
-        ...         out = self.sparse_apply_ftrl_v2(self.var, self.accum, self.linear, grad, indices)
-        ...         return out
-        ...
-        >>> net = SparseApplyFtrlV2Net()
-        >>> grad = Tensor(np.array([[0.8, 0.5]]).astype(np.float32))
-        >>> indices = Tensor(np.ones([1]), mindspore.int32)
-        >>> output = net(grad, indices)
-        >>> print(output)
-        (Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 2.00000003e-01,  3.00000012e-01]]), Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 5.00000000e-01,  8.99999976e-01]]), Tensor(shape=[1, 2], dtype=Float32, value=
-        [[ 6.99999988e-01,  5.00000000e-01]]))
+        Deprecated
     """
     __mindspore_signature__ = (
@@ -6877,6 +6806,7 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
         sig.make_sig('indices', dtype=sig.sig_dtype.T1)
     )
+    @deprecated("2.1", "ops.SparseApplyFtrl", False)
     @prim_attr_register
     def __init__(self, lr, l1, l2, l2_shrinkage, lr_power, use_locking=False):
         """Initialize SparseApplyFtrlV2."""
@@ -6887,18 +6817,18 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
         self.lr = validator.check_positive_float(lr, "lr", self.name)
         self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
         self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
-        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
+        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
         self.l2_shrinkage = validator.check_value_type("l2_shrinkage", l2_shrinkage, [float], self.name)
         self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
         self.add_prim_attr('side_effect_mem', True)
     def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
-        validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
-        validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
+        validator.check('var shape', var_shape, 'accum shape', accum_shape, validator.EQ, self.name)
+        validator.check('var shape', var_shape, 'linear shape', linear_shape, validator.EQ, self.name)
         if len(var_shape) > 1:
-            validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
-        validator.check_int(len(indices_shape), 1, Rel.EQ, "indices rank", self.name)
-        validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
+            validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], validator.EQ, self.name)
+        validator.check_int(len(indices_shape), 1, validator.EQ, "indices rank", self.name)
+        validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], validator.EQ, self.name)
         return var_shape, accum_shape, linear_shape
     def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
@@ -6912,12 +6842,20 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
 class Dropout(PrimitiveWithCheck):
     """
     During training, randomly zeroes some of the elements of the input tensor
-    with probability 1-`keep_prob` from a Bernoulli distribution.
+    with probability 1-`keep_prob` from a Bernoulli distribution. It plays the
+    role of reducing neuron correlation and avoid overfitting.
-    Refer to :func:`mindspore.ops.dropout` for more detail.
+    Refer to :func:`mindspore.ops.dropout` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> dropout = ops.Dropout(keep_prob=0.5)
+        >>> x = Tensor(np.ones([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> output, mask = dropout(x)
+        >>> print(output.shape, mask.shape, mask.dtype)
+        (1, 2, 3, 4, 5) (16,) UInt8
     """
     @prim_attr_register
@@ -6925,10 +6863,10 @@ class Dropout(PrimitiveWithCheck):
         """Initialize Dropout."""
         self.seed0 = validator.check_value_type("Seed0", Seed0, [int], self.name)
         self.seed1 = validator.check_value_type("Seed1", Seed1, [int], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0, 1, Rel.INC_RIGHT, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0, 1, validator.INC_RIGHT, "keep_prob", self.name)
     def check_shape(self, x_shape):
-        validator.check_int(len(x_shape), 1, Rel.GE, "x_shape", self.name)
+        validator.check_int(len(x_shape), 1, validator.GE, "x_shape", self.name)
     def check_dtype(self, x_dtype):
         valid_dtypes = (mstype.float16, mstype.float32, mstype.float64)
@@ -6946,7 +6884,25 @@ class Dropout2D(PrimitiveWithInfer):
     Note:
         The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout2d`.
-    Refer to :func:`mindspore.ops.dropout2d` for more detail.
+    Args:
+        keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
+            means dropping out 20% of channels. Default: 0.5.
+    Inputs:
+        - **x** (Tensor) - A 4-D tensor with shape :math:`(N, C, H, W)`, where N is the batch size, C is the number
+          of channels, H is the feature height, and W is the feature width. The data type should be int8, int16, int32,
+          int64, float16 or float32.
+    Outputs:
+        - **output** (Tensor) - With the same shape and data type as `x`.
+        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        TypeError: If dtype of `x` is not int8, int16, int32, int64, float16, float32 or float64.
+        TypeError: If the data type of `keep_prob` is not float.
+        ValueError: If `keep_prob` is out of the range `[0.0, 1.0]`.
+        ValueError: If `x` shape is not `4D`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6964,7 +6920,7 @@ class Dropout2D(PrimitiveWithInfer):
         """Initialize Dropout2D."""
         super().__init__("Dropout2D")
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
 class Dropout3D(PrimitiveWithInfer):
@@ -6973,13 +6929,28 @@ class Dropout3D(PrimitiveWithInfer):
     with probability 1-`keep_prob` from a Bernoulli distribution(For a 5-dimensional tensor with a shape of NCDHW,
     the channel feature map refers to a 3-dimensional feature map with a shape of DHW).
+    Note:
+        The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout3d`.
     Dropout3D can improve the independence between channel feature maps.
+    Args:
+        keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
+            means dropping out 20% of channels. Default: 0.5.
-    Note:
-        The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout2d`.
+    Inputs:
+        - **x** (Tensor) - A 5-D tensor with shape :math:`(N, C, D, H, W)`, where N is the batch size, C is the number
+          of channels, D is the feature depth, H is the feature height, and W is the feature width.
+          The data type should be int8, int16, int32, int64, float16 or float32.
-    Refer to :func:`mindspore.ops.dropout3d` for more detail.
+    Outputs:
+        - **output** (Tensor) - With the same shape and data type as `x`.
+        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
+    Raises:
+        TypeError: If the data type of `keep_prob` is not float.
+        ValueError: If `keep_prob` is out of the range [0.0, 1.0];
+                    or if the dim of input is not 5-D.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6997,7 +6968,7 @@ class Dropout3D(PrimitiveWithInfer):
         """Initialize Dropout3D."""
         super().__init__("Dropout3D")
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
 class CTCLoss(Primitive):
@@ -7090,10 +7061,28 @@ class CTCGreedyDecoder(Primitive):
     r"""
     Performs greedy decoding on the logits given in inputs.
-    Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more detail.
+    Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]],
+        ...                           [[0.0, 0.6, 0.0], [0.5, 0.4, 0.5]]]), mindspore.float32)
+        >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
+        >>> decoded_indices, decoded_values, decoded_shape, log_probability = ops.CTCGreedyDecoder()(inputs,
+        ...                                                                                          sequence_length)
+        >>> print(decoded_indices)
+        [[0 0]
+         [0 1]
+         [1 0]]
+        >>> print(decoded_values)
+        [0 1 0]
+        >>> print(decoded_shape)
+        [2 2]
+        >>> print(log_probability)
+        [[-1.2]
+         [-1.3]]
     """
     @prim_attr_register
@@ -7115,23 +7104,24 @@ class BasicLSTMCell(PrimitiveWithInfer):
     def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'):
         """Initialize BasicLSTMCell."""
         self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
-        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, Rel.INC_BOTH, "keep_prob", self.name)
+        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
         self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
         self.state_is_tuple = validator.check_value_type("state_is_tuple", state_is_tuple, [bool], self.name)
         self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
     def infer_shape(self, x_shape, h_shape, c_shape, w_shape, b_shape):
-        validator.check_int(len(x_shape), 2, Rel.EQ, "x rank", self.name)
-        validator.check_int(len(h_shape), 2, Rel.EQ, "h rank", self.name)
-        validator.check_int(len(c_shape), 2, Rel.EQ, "c rank", self.name)
-        validator.check_int(len(w_shape), 2, Rel.EQ, "w rank", self.name)
-        validator.check_int(len(b_shape), 1, Rel.EQ, "b rank", self.name)
-        validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
-        validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], Rel.EQ, self.name)
-        validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], Rel.EQ, self.name)
-        validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
-        validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1], Rel.EQ, self.name)
-        validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name)
+        validator.check_int(len(x_shape), 2, validator.EQ, "x rank", self.name)
+        validator.check_int(len(h_shape), 2, validator.EQ, "h rank", self.name)
+        validator.check_int(len(c_shape), 2, validator.EQ, "c rank", self.name)
+        validator.check_int(len(w_shape), 2, validator.EQ, "w rank", self.name)
+        validator.check_int(len(b_shape), 1, validator.EQ, "b rank", self.name)
+        validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
+        validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
+        validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], validator.EQ, self.name)
+        validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
+        validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1],
+                        validator.EQ, self.name)
+        validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
         ct_shape = c_shape
         ht_shape = c_shape
         it_shape = c_shape
@@ -7152,7 +7142,7 @@ class BasicLSTMCell(PrimitiveWithInfer):
         return c_dtype, mstype.float16, c_dtype, c_dtype, c_dtype, c_dtype, c_dtype
-class DynamicRNN(PrimitiveWithInfer):
+class DynamicRNN(Primitive):
     r"""
     Applies a recurrent neural network to the input.
     Only long short-term memory (LSTM) is supported currently.
@@ -7167,9 +7157,9 @@ class DynamicRNN(PrimitiveWithInfer):
             h_{t+1} = o_{t+1} * \tanh(c_{t+1}) \\
         \end{array}
-    where :math:`h_{t+1}` is the hidden state at time `t+1`, :math:`x_{t+1}` is the input
-    at time `t+1`, :math:`h_{t}` is the hidden state of the layer
-    at time `t` or the initial hidden state at time `0`,
+    :math:`h_{t+1}` is the hidden state at time `t+1`. :math:`x_{t+1}` is the input
+    at time `t+1`. :math:`h_{t}` is the hidden state of the layer
+    at time `t` or the initial hidden state at time `0`.
     :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
     are learnable weights between the output and the input in the formula. For instance,
     :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
@@ -7184,8 +7174,10 @@ class DynamicRNN(PrimitiveWithInfer):
         keep_prob (float): A float identifying the keep prob in the operator. Default: 1.0.
         cell_clip (float): A float identifying the cell clip in the operator. Default: -1.0.
         num_proj (int): An integer identifying the number projection in the operator. Default: 0.
-        time_major (bool): A bool identifying the time major in the operator. Default: True.
-            Only `True` is currently supported.
+        time_major (bool): A bool specify the data format of `x`. If it is set to True, the format is
+            :math:`(num\_step, batch\_size, input\_size)`, if it is set to False, the format is
+            :math:`(batch\_size, num\_step, input\_size)`.
+            Default: True. Only supports True at present.
         activation (str): A string identifying the type of activation function in the operator. Default: 'tanh'.
             Only 'tanh' is currently supported.
         forget_bias (float): A float identifying the forget bias in the operator. Default: 0.0.
@@ -7277,45 +7269,8 @@ class DynamicRNN(PrimitiveWithInfer):
         validator.check_value_type("activation", activation, [str], self.name)
         self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
-    def infer_shape(self, x_shape, w_shape, b_shape, seq_shape, h_shape, c_shape):
-        validator.check_int(len(x_shape), 3, Rel.EQ, "x_shape", self.name)
-        validator.check_int(len(w_shape), 2, Rel.EQ, "w rank", self.name)
-        validator.check_int(len(b_shape), 1, Rel.EQ, "b rank", self.name)
-        validator.check_int(len(h_shape), 3, Rel.EQ, "h_shape", self.name)
-        validator.check_int(len(c_shape), 3, Rel.EQ, "c_shape", self.name)
-        if seq_shape is not None:
-            raise ValueError(f"For '{self.name}', the 'seq_length' must be None.")
-        num_step, batch_size, input_size = x_shape
-        hidden_size = w_shape[-1] // 4
-        validator.check("b_shape[-1]", b_shape[-1], "w_shape[-1]", w_shape[-1], Rel.EQ, self.name)
-        if w_shape[-1] % 4 != 0:
-            raise ValueError(f"For '{self.name}', the last dimension of 'w' must be a multiple of 4, "
-                             f"but got {w_shape[-1]}.")
-        validator.check("w_shape[0]", w_shape[0], "input_size + hidden_size",
-                        input_size + hidden_size, Rel.EQ, self.name)
-        validator.check("b_shape[0]", b_shape[0], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
-        validator.check_int(h_shape[0], 1, Rel.EQ, "h_shape[0]", self.name)
-        validator.check("h_shape[1]", h_shape[1], "batch_size", batch_size, Rel.EQ, self.name)
-        validator.check("h_shape[2]", h_shape[2], "hidden_size", hidden_size, Rel.EQ, self.name)
-        validator.check("c_shape", c_shape, "h_shape", h_shape, Rel.EQ, self.name)
-        self.placeholder_index = [3]
-        self.add_prim_attr("placeholder_index", self.placeholder_index)
-        self.add_prim_attr("input_size", input_size)
-        self.add_prim_attr("hidden_size", hidden_size)
-        y_shape = (num_step, batch_size, hidden_size)
-        return y_shape, y_shape, y_shape, y_shape, y_shape, y_shape, y_shape, y_shape
-    def infer_dtype(self, x_dtype, w_dtype, b_dtype, seq_dtype, h_dtype, c_dtype):
-        tuple(map(partial(validator.check_tensor_dtype_valid, valid_dtypes=[mstype.float16], prim_name=self.name),
-                  ("x", "w", "h", "c"),
-                  (x_dtype, w_dtype, h_dtype, c_dtype)))
-        validator.check_tensor_dtype_valid("b", b_dtype, (mstype.float16, mstype.float32), self.name)
-        return b_dtype, x_dtype, b_dtype, b_dtype, b_dtype, b_dtype, b_dtype, b_dtype
-class DynamicGRUV2(PrimitiveWithInfer):
+class DynamicGRUV2(Primitive):
     r"""
     Applies a single-layer gated recurrent unit (GRU) to an input sequence.
@@ -7330,7 +7285,7 @@ class DynamicGRUV2(PrimitiveWithInfer):
     where :math:`h_{t+1}` is the hidden state at time `t+1`, :math:`x_{t+1}` is the input
     at time `t+1`, :math:`h_{t}` is the hidden state of the layer
-    at time `t` or the initial hidden state at time `0`, and :math:`r_{t+1}`,
+    at time `t` or the initial hidden state at time `0`. :math:`r_{t+1}`,
     :math:`z_{t+1}`, :math:`n_{t+1}` are the reset, update, and new gates, respectively.
     :math:`W`, :math:`b` are the weight parameter and the deviation parameter respectively.
     :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product.
@@ -7445,72 +7400,20 @@ class DynamicGRUV2(PrimitiveWithInfer):
         self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name)
         self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
         self.gate_order = validator.check_string(gate_order, ['zrh', 'rzh'], "gate_order", self.name)
-        self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name)
-    def infer_shape(self, x_shape, winput_shape, whidden_shape, binput_shape, bhidden_shape, seq_shape, h_shape):
-        validator.check_int(len(x_shape), 3, Rel.EQ, "x shape", self.name)
-        validator.check_int(len(winput_shape), 2, Rel.EQ, "weight input shape rank", self.name)
-        validator.check_int(len(whidden_shape), 2, Rel.EQ, "weight hidden shape rank", self.name)
-        num_step, batch_size, input_size = x_shape
-        hidden_size = winput_shape[-1] // 3
-        if winput_shape[-1] % 3 != 0:
-            raise ValueError(f"For '{self.name}', the last dimension of 'w' must be a multiple of 3, "
-                             f"but got {winput_shape[-1]}.")
-        self.placeholder_index = [3, 4, 5]
-        if binput_shape is not None:
-            validator.check_int(len(binput_shape), 1, Rel.EQ, "bias input shape rank", self.name)
-            validator.check("bias_input_shape", binput_shape, "3 * hidden_shape", [3 * hidden_size], Rel.EQ, self.name)
-            self.placeholder_index.remove(3)
-        if bhidden_shape is not None:
-            validator.check_int(len(bhidden_shape), 1, Rel.EQ, "bias hidden shape rank", self.name)
-            validator.check("bias_hidden_shape", bhidden_shape,
-                            "3 * hidden_shape", [3 * hidden_size], Rel.EQ, self.name)
-            self.placeholder_index.remove(4)
-        if seq_shape is not None:
-            raise ValueError(f"For '{self.name}', the dimension of 'seq_length' must be None, "
-                             f"but got {seq_shape}.")
-        validator.check_int(len(h_shape), 2, Rel.EQ, "init_h shape rank", self.name)
-        validator.check("init_h_shape[0]", h_shape[0], "batch_size", batch_size, Rel.EQ, self.name)
-        validator.check("init_h_shape[1]", h_shape[1], "hidden_size", hidden_size, Rel.EQ, self.name)
-        validator.check("weight_input_shape[-1]", winput_shape[-1], "weight_hidden_shape[-1]",
-                        whidden_shape[-1], Rel.EQ, self.name)
-        validator.check("weight_input_shape[0]", winput_shape[0], "input_size", input_size, Rel.EQ, self.name)
-        validator.check("weight_hidden_shape[0]", whidden_shape[0], "hidden_size", hidden_size, Rel.EQ, self.name)
-        if self.num_proj > 0:
-            y_shape = (num_step, batch_size, min(hidden_size, self.num_proj))
-        else:
-            y_shape = (num_step, batch_size, hidden_size)
-        out_shape = (num_step, batch_size, hidden_size)
-        self.add_prim_attr("placeholder_index", self.placeholder_index)
-        return y_shape, out_shape, out_shape, out_shape, out_shape, out_shape
-    def infer_dtype(self, x_dtype, winput_dtype, whidden_dtype, binput_dtype, bhidden_dtype, seq_dtype, h_dtype):
-        validator.check_tensor_dtype_valid("x dtype", x_dtype, [mstype.float16], self.name)
-        validator.check_tensor_dtype_valid("weight input dtype", winput_dtype, [mstype.float16], self.name)
-        validator.check_tensor_dtype_valid("weight hidden dtype", whidden_dtype, [mstype.float16], self.name)
-        valid_dtypes = [mstype.float16, mstype.float32]
-        validator.check_tensor_dtype_valid("init_h dtype", h_dtype, valid_dtypes, self.name)
-        b_dtype = h_dtype
-        if binput_dtype is not None:
-            args = {'init_h': h_dtype, 'bias_input': binput_dtype}
-            validator.check_tensors_dtypes_same_and_valid(args, valid_dtypes, self.name)
-            b_dtype = binput_dtype
-        if bhidden_dtype is not None:
-            args = {'init_h': h_dtype, 'bias_hidden': bhidden_dtype}
-            validator.check_tensors_dtypes_same_and_valid(args, valid_dtypes, self.name)
-            b_dtype = bhidden_dtype
-        return b_dtype, b_dtype, b_dtype, b_dtype, b_dtype, b_dtype
+        self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name)
+        self.init_prim_io_names(
+            inputs=[
+                "x", "weight_input", "weight_hidden", "bias_input",
+                "bias_hidden", "seq_length", "init_h"
+            ],
+            outputs=["y", "output_h", "update", "reset", "new", "hidden_new"])
 class InTopK(Primitive):
     r"""
     Determines whether the targets are in the top `k` predictions.
-    Refer to :func:`mindspore.ops.intopk` for more detail.
+    Refer to :func:`mindspore.ops.intopk` for more details.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -7531,7 +7434,7 @@ class InTopK(Primitive):
         validator.check_value_type("k", k, [int], self.name)
-class LRN(PrimitiveWithInfer):
+class LRN(Primitive):
     r"""
     Local Response Normalization.
@@ -7540,7 +7443,7 @@ class LRN(PrimitiveWithInfer):
         b_{c} = a_{c}\left(k + \frac{\alpha}{n}
         \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}
-    where the :math:`a_{c}` indicates the specific value of the pixel corresponding to c in feature map;
+    where the :math:`a_{c}` indicates the specific value of the pixel corresponding to :math:`c` in feature map;
     where the :math:`n/2` indicates the `depth_radius`; where the :math:`k` indicates the `bias`;
     where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`.
@@ -7630,10 +7533,10 @@ class AvgPool3D(Primitive):
             - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will
               be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
-        pad (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of
-                    head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
-                    integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2],
-                    pad[3], pad[4] and pad[5] correspondingly.
+        pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: 0. If `pad` is an integer,
+            the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
+            If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
+            pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
         ceil_mode (bool): If True, ceil instead of floor to compute the output shape. Default: False.
         count_include_pad (bool): If True, averaging calculation will include the zero-padding. Default: True.
         divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
@@ -7661,7 +7564,7 @@ class AvgPool3D(Primitive):
         ValueError: If `data_format` is not 'NCDHW'.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
@@ -7681,7 +7584,7 @@ class AvgPool3D(Primitive):
         self.add_prim_attr('kernel_size', self.kernel_size)
         self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True)
         self.add_prim_attr('strides', self.strides)
-        validator.check_value_type('pad', pad, (int, tuple), self.name)
+        validator.check_value_type('pad', pad, (int, tuple, list), self.name)
         if isinstance(pad, int):
             pad = (pad,) * 6
         if len(pad) != 6:
@@ -7711,9 +7614,9 @@ class Conv3D(Primitive):
     Applies a 3D convolution over an input tensor which is typically of shape
     :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
-    :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. Where :math:`N` is batch size, :math:`C` is channel number,
-    :math:`D` is depth, :math:`H` is height, :math:`W` is width.
-    the formula is defined as:
+    :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
+    :math:`D` is depth, :math:`H, W` is feature height and width respectively.
+    the output value of a layer is calculated as:
     .. math::
@@ -7721,7 +7624,21 @@ class Conv3D(Primitive):
         \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
         \operatorname{input}\left(N_{i}, k\right))
-    where :math:`k` is kernel, :math:`ccor` is the cross-correlation operator.
+    where :math:`k` is kernel,
+    :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
+    :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
+    the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
+    is a convolution kernel slice with shape
+    :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
+    where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
+    the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
+    and :math:`\text{X}` is the input tensor.
+    The shape of full convolution kernel is
+    :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
+    where `groups` is the number of groups to split `input` in the channel dimension.
+    For more details, please refer to the paper `Gradient Based Learning Applied to Document
+    Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
     If the 'pad_mode' is set to be "valid", the output depth, height and width will be
     :math:`\left \lfloor{1 + \frac{D_{in} + 2 \times \text{padding} - \text{ks_d} -
@@ -7735,15 +7652,15 @@ class Conv3D(Primitive):
     Args:
         out_channel (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers. Specifies the depth, height
-            and width of the 3D convolution window. Single int means the value is for the depth, height and width
-            of the kernel. A tuple of 3 ints means the first value is for the depth, height and the other is for the
-            width of the kernel.
+        kernel_size (Union[int, tuple[int]]): Specifies the depth, height
+            and width of the 3D convolution window. It can be a single int or a tuple of 3 integers.
+            Single int means the value is for the depth, height and width
+            of the kernel. A tuple of 3 ints corresponds to the depth, height and width of the kernel respectively.
         mode (int): Modes for different convolutions. It is currently not used. Default: 1.
-        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
-            the depth, height and width of movement are both strides, or a tuple of three int numbers that
-            represent depth, height and width of movement respectively. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are
+        stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number
+            that represents the depth, height and width of movement or a tuple of three int numbers that
+            represent depth, height and width movement respectively. Default: 1.
+        pad_mode (str, optional): Specifies padding mode. The optional values are
             "same", "valid" and "pad". Default: "valid".
             - same: Adopts the way of completion. The depth, height and width of the output will be equal to
@@ -7763,15 +7680,14 @@ class Conv3D(Primitive):
                     head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
                     integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2],
                     pad[3], pad[4] and pad[5] correspondingly.
-        dilation (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers
-                                      :math:`(dilation_d, dilation_h, dilation_w)`.
-                                      Currently, dilation on depth only supports the case of 1.
-                                      Specifies the dilation rate to use for dilated convolution.
-                                      If set :math:`k > 1`, there will be :math:`k - 1` pixels skipped
-                                      for each sampling location. Its value must be greater than or equal to 1 and
-                                      bounded by the height and width of the input. Default: 1.
-        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
-            divisible by the number of groups. Default: 1. Only 1 is currently supported.
+        dilation (Union[int, tuple[int]], optional): The data type is int or a tuple of 3 integers
+            :math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1
+            on Ascend backend. Specifies the dilation rate to use for dilated convolution. If set :math:`k > 1`,
+            there will be :math:`k - 1` pixels skipped for each sampling location.
+            The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
+            respectively. Default: 1.
+        group (int, optional):The number of groups into which the filter is divided. `in_channels`
+            and `out_channels` must be divisible by `group`. Default: 1.
         data_format (str): The optional value for data format. Currently only support "NCDHW".
     Inputs:
@@ -7823,8 +7739,13 @@ class Conv3D(Primitive):
         self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name)
         self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, ret_five=True)
         self.add_prim_attr('strides', self.stride)
-        self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
-                                               ret_five=True, third_one=True)
+        target = context.get_context("device_target")
+        if target.lower() == "ascend":
+            self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
+                                                   ret_five=True, third_one=True)
+        else:
+            self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
+                                                   ret_five=True, third_one=False)
         self.add_prim_attr('dilations', self.dilation)
         validator.check_value_type('pad', pad, (int, tuple), self.name)
         if isinstance(pad, int):
@@ -7850,7 +7771,13 @@ class Conv3D(Primitive):
         self.format = validator.check_string(data_format, ['NCDHW'], 'data_format', self.name)
         self.add_prim_attr('data_format', self.format)
         self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
-        self.group = validator.check_equal_int(group, 1, 'group', self.name)
+        validator.check_value_type("group", group, (int,), self.name)
+        validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
+        device_target = context.get_context("device_target")
+        if device_target == "Ascend" and group != 1:
+            raise ValueError("On Ascend platform, group = 1 must be satisfied.")
+        self.group = group
         self.add_prim_attr('groups', self.group)
         self.add_prim_attr('offset_x', 0)
@@ -8071,7 +7998,7 @@ class SparseApplyAdadelta(Primitive):
     def __init__(self, epsilon, use_locking=False):
         """Initialize SparseApplyAdadelta"""
         validator.check_value_type("epsilon", epsilon, [float], self.name)
-        validator.check_number("epsilon", epsilon, 0.0, Rel.GE, self.name)
+        validator.check_number("epsilon", epsilon, 0.0, validator.GE, self.name)
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
@@ -8082,46 +8009,54 @@ class CTCLossV2(Primitive):
     The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with
     Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
-        blank (int): The blank label. Default: 0.
-        reduction (string): Apply specific reduction method to the output. Currently only support 'none',
+        blank (int, optional): The blank label. Default: 0.
+        reduction (str, optional): Apply specific reduction method to the output. Currently only support 'none',
             not case sensitive. Default: "none".
-        zero_infinity (bool): Whether to set infinite loss and correlation gradient to zero. Default: False.
+        zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss
+            and its correlated gradient to zero. Default: False.
     Inputs:
-        - **log_probs** (Tensor) - A tensor of shape (T, N, C), where T is input length, N is batch size and C is number
-          of classes (including blank).
-        - **targets** (Tensor) - A tensor of shape (N, S), where S is max target length, means the target sequences.
-        - **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the input.
-        - **target_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the target.
+        - **log_probs** (Tensor) - A tensor of shape :math:`(T, C, N)`, where :math:`T` is input length, :math:`N` is
+          batch size and :math:`C` is number of classes (including blank).
+        - **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
+          means the target sequences.
+        - **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
+          It means the lengths of the input.
+        - **target_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
+          It means the lengths of the target.
     Outputs:
         - **neg_log_likelihood** (Tensor) - A loss value which is differentiable with respect to each input node.
         - **log_alpha** (Tensor) - The probability of possible trace of input to target.
     Raises:
-        TypeError: If `zero_infinity` is not a bool, reduction is not string.
+        TypeError: If `zero_infinity` is not a bool.
+        TypeError: If `reduction` is not string.
         TypeError: If the dtype of `log_probs` is not float or double.
         TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64.
-        RuntimeError: If the rank of `log_probs` is not 3.
-        RuntimeError: If the rank of `targets` is not 2.
-        RuntimeError: If the shape of `input_lengths` does not match {batch_size|N}.
-        RuntimeError: If the shape of `target_lengths` does not match {batch_size|N}.
-        RuntimeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
-        RuntimeError: If the value of `blank` is not in range [0, num_labels|C).
-        RuntimeError: If any value of `input_lengths` is larger than (time_series|T).
-        RuntimeError: If any target_lengths[i] is not in range [0, input_length[i]].
+        ValueError: If the rank of `log_probs` is not 3.
+        ValueError: If the rank of `targets` is not 2.
+        ValueError: If the shape of `input_lengths` does not match batch_size :math:`N`.
+        ValueError: If the shape of `target_lengths` does not match batch_size :math:`N`.
+        TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
+        ValueError: If the value of `blank` is not in range [0, num_labels|C).
+        RuntimeError: If any value of `input_lengths` is larger than (num_labels|C).
+        RuntimeError: If any `target_lengths[i]` is not in range [0, `input_length[i]`].
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]],
-                                         [[0.9, 0.4, 0.2]]]).astype(np.float32))
+        ...                              [[0.9, 0.4, 0.2]]]).astype(np.float32))
         >>> targets = Tensor(np.array([[0, 1]]), mstype.int32)
         >>> input_lengths = Tensor(np.array([2]), mstype.int32)
         >>> target_lengths = Tensor(np.array([1]), mstype.int32)
-        >>> CTCLossV2 = op.CTCLossV2(blank=0, reduction='none', zero_infinity=False)
+        >>> CTCLossV2 = ops.CTCLossV2(blank=0, reduction='none', zero_infinity=False)
         >>> neg_log_hood, log_alpha = CTCLossV2(
         ...     log_probs, targets, input_lengths, target_lengths)
         >>> print(neg_log_hood)
@@ -8132,7 +8067,7 @@ class CTCLossV2(Primitive):
     """
     @prim_attr_register
-    def __init__(self, blank, reduction="none", zero_infinity=False):
+    def __init__(self, blank=0, reduction="none", zero_infinity=False):
         """Initialize CTCLossV2"""
         self.init_prim_io_names(inputs=["log_probs", "targets", "input_lengths", "target_lengths"],
                                 outputs=["neg_log_likelihood", "log_alpha"])
@@ -8362,17 +8297,19 @@ class Conv3DTranspose(Primitive):
                              f"when 'pad_mode' is not \"pad\", but got 'output_padding' is "
                              f"{output_padding} and 'pad_mode' is {pad_mode}.")
         self.add_prim_attr('output_padding', self.output_padding)
-        validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2], 1, 343, Rel.INC_BOTH,
-                                  'The product of height, width and depth of kernel_size belonging [1, 343]', self.name)
-        validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, Rel.INC_BOTH,
+        validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2],
+                                  1, 343, validator.INC_BOTH,
+                                  'The product of height, width and depth of kernel_size belonging [1, 343]',
+                                  self.name)
+        validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, validator.INC_BOTH,
                                   'The product of height, width and depth of stride belonging [1, 343]', self.name)
-        validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, Rel.INC_BOTH,
+        validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, validator.INC_BOTH,
                                   'The product of height, width and depth of stride belonging [1, 256]', self.name)
-        validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), validator.INC_LEFT,
                                   'output_padding_d belonging [0, max(stride_d, dilation_d))', self.name)
-        validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), validator.INC_LEFT,
                                   'output_padding_h belonging [0, max(stride_h,dilation_h))', self.name)
-        validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), Rel.INC_LEFT,
+        validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), validator.INC_LEFT,
                                   'output_padding_w belonging [0, max(stride_w,dilation_w))', self.name)
@@ -8390,7 +8327,9 @@ class Dilation2D(Primitive):
         \text{input}(N_i, C_j, s_0 \times h + d_0 \times m, s_1 \times w + d_1 \times n) + \text{filter}(C_j, m, n)
     .. warning::
-        This operator is an experimental operator, which has some accuracy problems for some inputs.
+        This is an experimental API that is subjected to change or deletion.
+    Note:
         If the input data type is float32, this operator is still executed in float16 mode.
     Args:
@@ -8405,7 +8344,7 @@ class Dilation2D(Primitive):
                                       each sampling location. Its value must be greater or equal to 1 and bounded by
                                       the height and width of the input `x`.
-        pad_mode (str): Specifies padding mode. The optional values are
+        pad_mode (str, optional): Specifies padding mode. The optional values are
             "same", "valid". Default: "same". Both upper and lower case are supported.
             - same: Adopts the way of completion. The height and width of the output will be the same as
@@ -8413,10 +8352,10 @@ class Dilation2D(Primitive):
             - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
               without padding. Extra pixels will be discarded.
-        data_format (str): The value for data format, only 'NCHW' is supported at present. Default: "NCHW".
+        data_format (str, optional): The value for data format, only 'NCHW' is supported at present. Default: "NCHW".
     Inputs:
-        - **x** (Tensor) - Input data. A four dimension tensor with float16 or float32 data type. The shape must be
+        - **x** (Tensor) - Input data. A 4-D Tensor, its shape must be
           :math:`(N, C_{in}, H_{in}, W_{in})`.
         - **filter** (Tensor) - A three dimension tensor with the same type as input. The shape must be
           :math:`(C_{in}, H_{filter}, W_{filter})`.
@@ -8426,7 +8365,7 @@ class Dilation2D(Primitive):
         is not necessarily the same as the input x, the type is the same as the input x.
     Raises:
-        TypeError: If type of `x` or `filter` is not the tpye in [uint8, uint16, uint32, uint64, int8, int16,
+        TypeError: If type of `x` or `filter` is not the type in [uint8, uint16, uint32, uint64, int8, int16,
                                   int32, int64, float16, float32, float64].
         TypeError: If `stride` or `dilation` is not an int number or a tuple of two or four int numbers.
         ValueError: If the length of `stride` or `dilation` is neither two nor four when they are tuple.
@@ -8437,7 +8376,7 @@ class Dilation2D(Primitive):
         ValueError: If `data_format` is not the str of 'NCHW'.
     Supported Platforms:
-        ``Ascend`` ``GPU``
+        ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.ones([10, 5, 32, 32]), mindspore.float16)
@@ -8502,10 +8441,10 @@ class SoftShrink(Primitive):
     r"""
     Applies the SoftShrink function element-wise.
-    Refer to :func:`mindspore.ops.soft_shrink` for more detail.
+    Refer to :func:`mindspore.ops.softshrink` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
@@ -8523,17 +8462,17 @@ class SoftShrink(Primitive):
     def __init__(self, lambd=0.5):
         """Initialize SoftShrink"""
         validator.check_value_type("lambd", lambd, [float], self.name)
-        validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
+        validator.check_number("lambd", lambd, 0, validator.GE, self.name)
 class HShrink(Primitive):
     r"""
     Hard Shrink activation function.
-    Refer to :func:`mindspore.ops.hardshrink` for more detail.
+    Refer to :func:`mindspore.ops.hardshrink` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore as ms
@@ -8590,9 +8529,9 @@ class ApplyAdagradDA(Primitive):
     Inputs:
         - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **gradient_accumulator** (Parameter) - The dict of mutable tensor gradient_accumulator. Must have the same
+        - **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`. Must have the same
           shape and dtype as `var`.
-        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor gradient_squared_accumulator.
+        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
           Must have the same shape and dtype as `var`.
         - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
         - **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
@@ -8695,11 +8634,11 @@ class SparseApplyRMSProp(Primitive):
     the relatively highest priority data type.
     Args:
-        rho (float): Decay rate. The value should between 0 and 1, otherwise the behavior is undefined.
+        rho (float): Decay rate. The value should be between 0 and 1, otherwise the behavior is undefined.
         momentum (float): Momentum. The value should be greater or equal to 0, otherwise the behavior is undefined.
         epsilon (float): A small value added for numerical stability. The value should be greater than 0,
                          otherwise the behavior is undefined.
-        use_locking (bool): If `True`, updating of the var, ms, and mom tensors is protected by a lock;
+        use_locking (bool): If `True`, updating of the var, ms, and mom tensors are protected by a lock;
                             otherwise the behavior is undefined, but may exhibit less contention. Default: False.
     Inputs:
@@ -8738,7 +8677,7 @@ class SparseApplyRMSProp(Primitive):
         RuntimeError: If the data type of `var`, `ms`, `mom` and `grad` conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend``
+        ``Ascend``  ``GPU`` ``CPU``
     Examples:
         >>> class SparseApplyRMSPropNet(nn.Cell):
@@ -8786,9 +8725,9 @@ class SparseApplyRMSProp(Primitive):
         validator.check_value_type("momentum", momentum, [float], self.name)
         validator.check_value_type("epsilon", epsilon, [float], self.name)
         validator.check_value_type("use_locking", use_locking, [bool], self.name)
-        self.epsilon = validator.check_number("epsilon", epsilon, 0.0, Rel.GT, self.name)
-        self.momentum = validator.check_number("momentum", momentum, 0.0, Rel.GE, self.name)
-        self.rho = validator.check_float_range(rho, 0.0, 1.0, Rel.INC_BOTH, "rho", self.name)
+        self.epsilon = validator.check_number("epsilon", epsilon, 0.0, validator.GT, self.name)
+        self.momentum = validator.check_number("momentum", momentum, 0.0, validator.GE, self.name)
+        self.rho = validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", self.name)
 class SparseApplyCenteredRMSProp(Primitive):
@@ -8855,13 +8794,9 @@ class SparseApplyCenteredRMSProp(Primitive):
         ValueError: If shape of `grad` is not same as shape of `var` except first dimension.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.common.dtype as mstype
-        >>> import mindspore.ops.operations.nn_ops as nn_ops
         >>> var = Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32))
         >>> mg = Tensor(np.array([[0.1, 0.3], [0.1, 0.5]]).astype(np.float32))
         >>> ms = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32))
@@ -8880,10 +8815,10 @@ class SparseApplyCenteredRMSProp(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
-        sig.make_sig('mg', dtype=sig.sig_dtype.T),
-        sig.make_sig('ms', dtype=sig.sig_dtype.T),
-        sig.make_sig('mom', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('mg', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('lr', dtype=sig.sig_dtype.T),
         sig.make_sig('rho', dtype=sig.sig_dtype.T),
         sig.make_sig('momentum', dtype=sig.sig_dtype.T),
@@ -8998,53 +8933,15 @@ class ApplyKerasMomentum(Primitive):
 class MultilabelMarginLoss(Primitive):
     r"""
-    MultilabelMarginLoss operation.
-    Creates a criterion that optimizes a multi-class multi-classification
-    hinge loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`)
-    and output :math:`y` (which is a 2D `Tensor` of target class indices).
-    For each sample in the mini-batch:
-    .. math::
-        \text{loss}(x, y) = \sum_{ij}\frac{\max(0, 1 - (x[y[j]] - x[i]))}{\text{x.size}(0)}
-    where :math:`x \in \left\{0, \; \cdots , \; \text{x.size}(0) - 1\right\}`, \
-    :math:`y \in \left\{0, \; \cdots , \; \text{y.size}(0) - 1\right\}`, \
-    :math:`0 \leq y[j] \leq \text{x.size}(0)-1`, \
-    and :math:`i \neq y[j]` for all :math:`i` and :math:`j`.
-    :math:`y` and :math:`x` must have the same size.
-    The criterion only considers a contiguous block of non-negative targets that
-    starts at the front.
-    This allows for different samples to have variable amounts of target classes.
-    Args:
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
-    Inputs:
-        - **x** (Tensor) - Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N`
-          is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32.
-        - **target** (Tensor) - Ground truth data, with the same shape as `x`, data type must be int32 and
-          label targets padded by -1.
-    Outputs:
-        - **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
-          is :math:`(N)`. Otherwise, a scalar value will be returned.
-        - **is_target** (Tensor) - Output tensor for backward input, with the same shape as `target`,
-          data type must be int32.
+    Creates a loss criterion that minimizes the hinge loss for multi-class
+    classification tasks.
+    It takes a 2D mini-batch Tensor :math:`x` as input and a 2D
+    Tensor :math:`y` containing target class indices as output.
-    Raises:
-        TypeError: If `x` or `target` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        TypeError: If dtype of `target` is not int32.
-        ValueError: If length of shape of `x` is neither 1 nor 2.
-        ValueError: If shape of `x` is not the same as `target`.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+    Refer to :func:`mindspore.ops.multilabel_margin_loss` for more details.
     Supported Platforms:
-        ``Ascend``
+        ``Ascend`` ``GPU``
     Examples:
        >>> loss = ops.MultilabelMarginLoss()
@@ -9117,7 +9014,7 @@ class ApplyAdamWithAmsgrad(Primitive):
         ValueError: If the shape of `beta1_power`, `beta2_power`, `lr` is not 0.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> class ApplyAdamWithAmsgradNet(nn.Cell):
@@ -9163,57 +9060,16 @@ class ApplyAdamWithAmsgrad(Primitive):
 class GridSampler3D(Primitive):
     """
-    Given an `input_x` and a flow-field `grid`, computes the `output` using `input_x` values and pixel locations from
-    `grid`. Only volumetric (5-D) `input_x` is supported.
-    For `input_x` with shape :math:`(N, C, D_{in}, H_{in}, W_{in})` and `grid` with shape :math:`(N, D_{out}, H_{out},
-    W_{out}, 3)`, the `output` will have shape :math:`(N, C, D_{out}, H_{out}, W_{out})`.
-    For each output location `output[n, :, d, h, w]`, the size-3 vector `grid[n, d, h, w]` specifies `input_x` pixel
-    locations x, y, z, which are used to interpolate the output value `output[n, :, d, h, w]`. And `interpolation_mode`
-    argument specifies "nearest" or "bilinear" interpolation method to sample the input pixels.
-    `grid` specifies the sampling pixel locations normalized by the `input_x` spatial dimensions. Therefore, it should
-    have most values in the range of :math:`[-1, 1]`.
-    If `grid` has values outside the range of :math:`[-1, 1]`, the corresponding outputs are handled as defined by
-    `padding_mode`. If `padding_mode` is set to be "zeros", use :math:`0` for out-of-bound grid locations. If
-    `padding_mode` is set to be "border", use border values for out-of-bound grid locations. If `padding_mode` is set
-    to be "reflection", use values at locations reflected by the border for out-of-bound grid locations. For location
-    far away from the border, it will keep being reflected until becoming in bound.
-    Args:
-        interpolation_mode (str): An optional string specifying the interpolation method. The optional values are
-            "bilinear" or "nearest". Default: "bilinear".
-        padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
-            "reflection". Default: "zeros".
-        align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
-            the center points of the input’s corner pixels. If set to `False`, they are instead considered as referring
-            to the corner points of the input’s corner pixels, making the sampling more resolution agnostic. Default:
-            `False`.
-    Inputs:
-        - **input_x** (Tensor) - A 5-D tensor with dtype of float32 or float64 and shape of :math:`(N, C, D_{in},
-          H_{in}, W_{in})`.
-        - **grid** (Tensor) - A 5-D tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, D_{out},
-          H_{out}, W_{out}, 3)`.
+    Given an input and a grid, the output is calculated using the input values
+    and pixel positions in the grid. Only volume (5-D) input is supported.
-    Outputs:
-        A 5-D Tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, C, D_{out}, H_{out}, W_{out})`.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
-    Raises:
-        TypeError: If `input_x` or `grid` is not a Tensor.
-        TypeError: If the dtypes of `input_x` and `grid` are inconsistent.
-        TypeError: If the dtype of `input_x` or `grid` is not a valid type.
-        TypeError: If `align_corners` is not a boolean value.
-        ValueError: If the rank of `input_x` or `grid` is not equal to 5.
-        ValueError: If the first dimension of `input_x` is not equal to that of `grid`.
-        ValueError: If the last dimension of `grid` is not equal to 3.
-        ValueError: If `interpolation_mode` is not "bilinear", "nearest" or a string value.
-        ValueError: If `padding_mode` is not "zeros", "border", "reflection" or a string value.
+    Refer to :func:`mindspore.ops.grid_sample` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> gridsampler = ops.GridSampler3D(interpolation_mode='bilinear', padding_mode='zeros', align_corners=True)
@@ -9247,33 +9103,40 @@ class FractionalMaxPool(Primitive):
     r"""
     Performs fractional max pooling on the input.
-    Fractional max pooling is similar to regular max pooling, In regular max pooling, you downsize an
-    input set by taking the maximum value of smaller N x N subsections of the set (often 2x2), and try
-    to reduce the set by a factor of N, where N is an integer. Fractional max pooling, means that the
-    overall reduction ratio N does not have to be an integer.
-    The sizes of the pooling regions are generated randomly but are fairly uniform.
+    Fractional max pooling is similar to regular max pooling, but with the added flexibility of
+    allowing the overall reduction ratio `N` to be a non-integer value. In regular max pooling,
+    an input set is reduced in size by taking the maximum value of  `N x N` (usually 2x2)
+    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
+    In contrast, fractional max pooling uses randomly generated pool sizes that are fairly uniform in size.
     .. warning::
         "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
-        and last elements must be 1.0 because we don't allow pooling on batch and channels dimensions.
+        and last elements must be 1.0 because pooling on batch and channels dimensions is not allowed.
     Args:
-        pooling_ratio (list(float)): Decide the shape of output, is a list of floats that has length >= 4.
-            Pooling ratio for each dimension of value should be >=0, currently only support for row and col
-            dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and
-            channels dimensions.
-        pseudo_random(bool): An optional bool. Defaults to False. When set to True, generates the pooling
-            sequence in a pseudo random fashion, otherwise, in a random fashion.
-            Check paper Benjamin Graham, Fractional Max-Pooling for difference between pseudo_random and
-            random.
-        overlapping(bool): An optional bool. Defaults to False. When set to True, it means when pooling,
-            the values at the boundary of adjacent pooling cells are used by both cells.
-        deterministic(bool): An optional bool. Defaults to False. When set to True, a fixed pooling region
-            will be used when iterating over a FractionalMaxPool node in the computation graph. Mainly
-            used in unit test to make FractionalMaxPool deterministic.
-        seed(int): An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, the
-            random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
-        seed2(int): An optional int. Defaults to 0. An second seed to avoid seed collision.
+        pooling_ratio (list(float)): Decide the shape of output, is a list of float numbers has length >= 4.
+            Pooling ratio for each dimension of value should not be less than 0, currently only support
+            for row and col dimension.
+        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
+            If the pseudo_random parameter is set to True, the sequence will be generated in a
+            pseudo-random fashion, otherwise it will be generated randomly.
+            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
+            by Benjamin Graham to understand the distinction between the two.
+            Default: False.
+        overlapping(bool, optional): When set to True, the values at the boundary of adjacent pooling cells
+            will be shared by both cells during pooling process. When set to False, the values are not reused.
+            Default: False.
+        deterministic(bool, optional): If deterministic is set to True, a fixed pooling region will be used
+            in the computation graph, ensuring that the FractionalMaxPool is deterministic.
+            This is often used in unit tests. When set to False, fixed pool regions will not be used.
+            Default: False.
+        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
+            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
+            the generator will be seeded by a random seed.
+            Default: 0.
+        seed2(int, optional): The second seed to avoid seed collision.
+            Default: 0.
     Inputs:
         - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
@@ -9326,9 +9189,10 @@ class FractionalMaxPool(Primitive):
 class FractionalMaxPool3DWithFixedKsize(Primitive):
     r"""
-    This operator applies a 3D fractional max pooling over an input signal composed of several input planes.
-    The max-pooling operation is applied in kD x kH x kW regions by a stochastic step size determined
-    by the target output size.
+    Applies a 3D fractional max pooling to an input signal composed of multiple input planes.
+    The max-pooling operation is applied in :math:`(kD, kH, kW)` regions by a stochastic step size determined by
+    the target output size `output_shape`.
     The number of output features is equal to the number of input planes.
     Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_  for more details.
@@ -9337,34 +9201,27 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
     D the feature depth, H is the feature height, and W is the feature width.
     Args:
-        ksize (Union[float, tuple]): The target ksize is D x H x W.
-            ksize can be a tuple, or a single K for K x K x K.
-            specifying the window size (D, H, W) of the input tensor.
-        output_shape (Union[int, tuple]): The target output_shape is D x H x W.
-            output_shape can be a tuple, or a single H for H x H x H.
-            specifying the size (D, H, W) of the output tensor.
-        data_format (str) : The optional value for data format.
+        ksize (Union[float, tuple]): Size of the pooling window. `ksize` can be a tuple of three values specify a
+            shape :math:`(k_D, k_H, k_W)`, or a single int `K` for :math:`(K, K, K)`.
+        output_shape (Union[int, tuple]): The target output shape. `output_shape` can be a tuple of three values
+            specify a shape :math:`(D_{out}, H_{out}, W_{out})`, or a single float `S` for :math:`(S, S, S)`.
+        data_format (str, optional): The optional value for data format.
             Currently support 'NCDHW' and 'NHDWC'. Default: 'NCDHW'.
     Inputs:
         - **x** (Tensor) - The input of FractionalMaxPool3DWithFixedKsize, which is a 4D or 5D tensor.
           Tensor of data type : float16, float32, double, int32, int64.
           Supported shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`.
         - **random_samples** (Tensor) - The random step of FractionalMaxPool3DWithFixedKsize, which is a 3D tensor.
           Tensor of data type : float16, float32, double, and value is between (0, 1).
           Supported shape :math:`(N, C, 3)`
     Outputs:
-        Outputs:
         - **y** (Tensor) - A tensor, the output of FractionalMaxPool3DWithFixedKsize.
-        Has the same data type with `x`.
-        Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
+          Has the same data type with `x`.
+          Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
         - **argmax** (Tensor) - A tensor, the indices along with the outputs.
-        Has the same shape as the `y` and int32 or int64 data type.
+          Has the same shape as the `y` and int32 or int64 data type.
     Raises:
         TypeError: If `input_x` is not a 4D or 5D tensor.
@@ -9387,7 +9244,7 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
         ...       .reshape([1, 1, 2, 2, 4]), mstype.float32)
         >>> random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
-        >>> ksize = (1.0, 1.0, 1.0)
+        >>> ksize = (1, 1, 1)
         >>> output_shape = (1, 1, 2)
         >>> net = ops.FractionalMaxPool3DWithFixedKsize(ksize = ksize, output_shape = output_shape)
         >>> output, argmax = net(x, random_samples)
@@ -9401,15 +9258,15 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
     def __init__(self, ksize, output_shape, data_format="NCDHW"):
         """Initialize FractionalMaxPool3DWithFixedKsize."""
         self.init_prim_io_names(inputs=["x", "random_samples"], outputs=["y", "argmax"])
-        validator.check_value_type("ksize", ksize, [float, tuple], self.name)
+        validator.check_value_type("ksize", ksize, [int, tuple], self.name)
         self.ksize = ksize
-        if isinstance(self.ksize, float):
+        if isinstance(self.ksize, int):
             self.ksize = (ksize, ksize, ksize)
         if len(self.ksize) != 3:
-            raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive float number or a tuple of "
-                             f"three float numbers, but got {len(self.ksize)} numbers.")
+            raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive int number or a tuple of "
+                             f"three int numbers, but got {len(self.ksize)} numbers.")
         for item in self.ksize:
-            validator.check_positive_float(item, 'ksize item', self.name)
+            validator.check_positive_int(item, 'ksize item', self.name)
         self.output_shape = validator.check_value_type("output_shape", output_shape, [int, tuple], self.name)
         self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name)
         self.output_shape = _check_3d_int_or_tuple("output_shape", output_shape,
@@ -9422,11 +9279,10 @@ class FractionalAvgPool(Primitive):
     r"""
     Performs fractional avg pooling on the input.
-    Fractional avg pooling is similar to regular avg pooling, In regular avg pooling, you downsize an
-    input set by taking the avgrage value of smaller N x N subsections of the set (often 2x2), and try
-    to reduce the set by a factor of N, where N is an integer. Fractional avg pooling, means that the
-    overall reduction ratio N does not have to be an integer. In each pooling region, a mean operation
-    is performed.
+    Fractional avg pooling is similar to regular avg pooling, but with the added flexibility of
+    allowing the overall reduction ratio `N` to be a non-integer value. In regular avg pooling,
+    an input set is reduced in size by taking the average value of  `N x N` (usually 2x2)
+    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
     .. warning::
         "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
@@ -9437,18 +9293,25 @@ class FractionalAvgPool(Primitive):
             Pooling ratio for each dimension of value should be >=0, currently only support for row and col
             dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and
             channels dimensions.
-        pseudo_random(bool): An optional bool. Defaults to False. When set to True, generates the pooling
-            sequence in a pseudorandom fashion, otherwise, in a random fashion.
-            Check paper Benjamin Graham, Fractional Max-Pooling for difference between pseudo_random and
-            random.
-        overlapping(bool): An optional bool. Defaults to False. When set to True, it means when pooling,
-            the values at the boundary of adjacent pooling cells are used by both cells.
-        deterministic(bool): An optional bool. Defaults to False. When set to True, a fixed pooling region
-            will be used when iterating over a FractionalAvgPool node in the computation graph. Mainly
-            used in unit test to make FractionalAvgPool deterministic.
-        seed(int): An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, the
-            random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
-        seed2(int): An optional int. Defaults to 0. An second seed to avoid seed collision.
+        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
+            If the pseudo_random parameter is set to True, the sequence will be generated in a
+            pseudo-random fashion, otherwise it will be generated randomly.
+            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
+            by Benjamin Graham to understand the distinction between the two.
+            Default: False.
+        overlapping(bool, optional): When set to True, the values at the boundary of adjacent pooling cells
+            will be shared by both cells during pooling process. When set to False, the values are not reused.
+            Default: False.
+        deterministic(bool, optional): If deterministic is set to True, a fixed pooling region will be used
+            in the computation graph, ensuring that the FractionalAvgPool is deterministic.
+            This is often used in unit tests. When set to False, fixed pool regions will not be used.
+            Default: False.
+        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
+            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
+            the generator will be seeded by a random seed.
+            Default: 0.
+        seed2(int, optional): The second seed to avoid seed collision.
+            Default: 0.
     Inputs:
         - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
@@ -9501,32 +9364,35 @@ class FractionalAvgPool(Primitive):
 class NthElement(Primitive):
     r"""
-    Finds values of the n-th order statistic for the last dimension.
-    If the input is a vector (rank-1), finds the entries which is the nth-smallest value in
-    the vector and outputs their values as scalar tensor.
-    For matrices (resp. higher rank input), computes the entries which is the nth-smallest value in
-    each row (resp. vector along the last dimension). Thus, values.shape = input.shape[:-1].
+    Computes the n-th smallest values for the last dimension of the input Tensor.
+    - When `input` is a 1-D Tensor (i.e. Vector), it finds the nth-smallest value in the vector
+      and outputs its value as a scalar Tensor.
+    - When `input` is matrices or has higher rank, it finds the nth-smallest value
+      in each row (or vector along the last dimension) and outputs
+      these values in a Tensor with shape of `values.shape = input.shape[:-1]`.
     Args:
-        reverse (bool): An optional bool. Defaults to False. When set to True, find the nth-largest value
-          in the vector and vice versa.
+        reverse (bool, optional): An optional bool. If set to True, it find the :math:`n`-th largest value
+          in the vector instead of the nth-smallest. Default: False.
     Inputs:
-        - **input** (Tensor) - A Tensor. 1-D or higher with last dimension at least n+1.
-        - **n** (int or Tensor) -  If the n is a tensor, it should be a 0-D tensor, dtype is int32.
-          Valid range of n is [0, input.shape[-1]).
+        - **input** (Tensor) - Input Tensor with 1-D or higher dimension.
+        - **n** (Union[int, Tensor]) -  If the `n` is a Tensor, it should be a 0-D Tensor, dtype is int32.
+          Valid range of `n` is :math:`[0, input.shape[-1])` where :math:`input.shape[-1]` is
+          last dimension size of `input`.
     Outputs:
-        Tensor, values.shape = input.shape[:-1]. The dtype is same to the input.
+        - **values** (Tensor) - Its shape satisfies:  `values`.shape = `input`.shape[:-1].
+          The dtype is the same as `input`.
     Raises:
-        TypeError: If the type  of input is out of the valid list.
-        TypeError: If the n is not int32 or not a Tensor.
-        ValueError: If n is out of [0, input.shape[-1]).
+        TypeError**: If the type  of `input` is out of the valid list.
+        TypeError**: If `n` is not int32 or not a Tensor.
+        ValueError**: If n is out of :math:`[0, input.shape[-1])`.
     Supported Platforms:
-         ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> input = Tensor(np.array([[1,2,3],[4,5,6]]) , mstype.int8)
@@ -9548,13 +9414,13 @@ class NthElement(Primitive):
 class PSROIPooling(Primitive):
     r"""
-    Position Sensitive ROI-Pooling
+    Applies Position Sensitive ROI-Pooling on input Tensor.
     Args:
         spatial_scale (float): a scaling factor that maps the box coordinates to the input coordinates.
                                For example, if your boxes are defined on the scale of a 224x224 image and
                                your input is a 112x112 feature map (resulting from a 0.5x scaling of the original
-                               image), you’ll want to set this to 0.5.
+                               image), you'll want to set this to 0.5.
         group_size (int): the size of the output (in pixels) after the pooling is performed, as (height, width).
         output_dim (int): the dim of the output after the pooling is performed.
@@ -9569,7 +9435,16 @@ class PSROIPooling(Primitive):
           0 <= x1 < x2 and 0 <= y1 < y2.
     Outputs:
-        - out (rois.shape[0] * rois.shape[2], output_dim, group_size, group_size), the result after pooling.
+        - **out** (Tensor) - The result after pooling. Its shape
+          is :math:`(rois.shape[0] * rois.shape[2], output\_dim, group\_size, group\_size)`.
+    Raises:
+        TypeError: If `spatial_scale` is not a float.
+        TypeError: If `group_size` or `output_dim` is not an int.
+        TypeError: If `features` or `rois` is not a Tensor.
+        TypeError: If dtype of `rois` is not float16 or float32.
+        ValueError: If shape of `features` does not satisfy :math:`(C == output\_dim * group\_size * group\_size)`.
+        ValueError: If `spatial_scale` is negative.
     Supported Platforms:
         ``Ascend``
@@ -9605,9 +9480,9 @@ class PSROIPooling(Primitive):
         ...                                       group_size=7)
         >>> out = psROIPooling(features, rois)
         >>> print(out.shape)
-            (4, 3, 7, 7)
+        (4, 3, 7, 7)
         >>> print(out.dtype)
-            Float32
+        Float32
     """
     @prim_attr_register
@@ -9636,9 +9511,10 @@ class TripletMarginLoss(Primitive):
     examples` respectively). The shapes of all input tensors should be
     :math:`(N, D)`.
-    The distance swap is described in detail in the paper `Learning shallow
-    convolutional feature descriptors with triplet losses` by
-    V. Balntas, E. Riba et al.
+    The distance swap is described in detail in the paper
+    `Learning local feature descriptors with triplets and shallow convolutional neural
+    networks <http://158.109.8.37/files/BRP2016.pdf>`_
+    by V. Balntas, E. Riba et al.
     The loss function for each sample in the mini-batch is:
@@ -9651,17 +9527,18 @@ class TripletMarginLoss(Primitive):
         d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p
     Args:
-        p (int): The norm degree for pairwise distance. Default: 2.
-        eps (float): Default: 1e-06.
-        swap (bool): The distance swap is described in detail in the paper
-            `Learning shallow convolutional feature descriptors with triplet losses` by
-            V. Balntas, E. Riba et al. Default: "False".
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
+        p (int, optional): The norm degree for pairwise distance. Default: 2.
+        eps (float, optional): Default: 1e-06.
+        swap (bool, optional): The distance swap. Default: False.
+        reduction (str, optional): Apply specific reduction method to the
+            output: "none", "mean", "sum". Default: "mean".
     Inputs:
         - **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType.
-        - **positive** (Tensor) - A sample belonging to the same category as x, with the same type and shape as `x`.
-        - **negative** (Tensor) - A sample belonging to the different class from x, with the same type and shape as `x`.
+        - **positive** (Tensor) - A sample belonging to the same category as x,
+          with the same type and shape as `x`.
+        - **negative** (Tensor) - A sample belonging to the different class from x,
+          with the same type and shape as `x`.
         - **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
     Outputs:
@@ -9669,21 +9546,23 @@ class TripletMarginLoss(Primitive):
         Otherwise, a scalar value will be returned.
     Raises:
-        TypeError: If `x` or `positive` or 'negative' or 'margin' is not a Tensor.
+        TypeError: If `x` or `positive` or `negative` or `margin` is not a Tensor.
         TypeError: If dtype of `x` or `positive` or `negative` is not BasicType.
         TypeError: If dtype of `x`, `positive` and `negative` is not the same.
         TypeError: If `margin` is not float32.
         TypeError: If `p` is not an int.
         TypeError: If `eps` is not a float.
         TypeError: If `swap` is not a bool.
-        ValueError: If dimensions of input `x`, `positive` and `negative` are less than or equal to 1 at the same time.
-        ValueError: If the dimension of input `x` or `positive` or `negative` is bigger than or equal to 8.
+        ValueError: If dimensions of input `x`, `positive` and `negative` are
+          less than or equal to 1 at the same time.
+        ValueError: If the dimension of input `x` or `positive` or `negative`
+          is bigger than or equal to 8.
         ValueError: If length of shape of `margin` is not 0.
         ValueError: If shape of `x`, `positive` and `negative` cannot broadcast.
         ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``GPU``
     Examples:
         >>> loss = ops.TripletMarginLoss()
@@ -9710,10 +9589,10 @@ class DeformableOffsets(Primitive):
     r"""
     Computes the deformed convolution output with the expected input.
-    Refer to :func:`mindspore.ops.deformable_conv2d` for more detail.
+    Refer to :func:`mindspore.ops.deformable_conv2d` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     """
     @prim_attr_register
@@ -9761,26 +9640,39 @@ class DeformableOffsets(Primitive):
 class GridSampler2D(Primitive):
     """
-    This operation samples 2d input_x by using interpolation based on flow field grid, which is usually gennerated by
-    affine_grid.
+    This operation samples 2d `input_x` by using interpolation based on flow field grid,
+    which is usually gennerated by :func:`mindspore.ops.affine_grid`.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
-        interpolation_mode (str): An optional string specifying the interpolation method. The optional values are
+        interpolation_mode (str, optional): An optional string specifying the interpolation method.
+            The optional values are
             "bilinear" or "nearest". Default: "bilinear".
-        padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
-            "reflection". Default: "zeros".
-        align_corners (bool): An optional bool. If "true", the centers of the corner pixels of the input and output
-            tensors are aligned. Defaults to "false".
+        padding_mode (str, optional): An optional string specifying the pad method.
+            The optional values are "zeros", "border" or "reflection". Default: "zeros".
+            When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
+            - "zeros": Pads the input tensor with zeros.
+            - "border": Pads the input tensor with the values of the pixels on the border of the tensor.
+            - "reflection": Pads the input tensor by reflecting the values of the pixels at the
+              boundary of the tensor.
+        align_corners (bool, optional): An optional bool. When set to True,
+            the centers of the corner pixels of the input
+            and output tensors are aligned. When set to False, it is not aligned. Defaults to False.
     Inputs:
-        - **input_x** (Tensor) - A 4-D tensor with dtype of float16 or float32 and shape of :math:`(N, C,
-          H_{in}, W_{in})`.
-        - **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is :math:`(N,
-          H_{out}, W_{out}, 2)`. Used to specify the sampling pixel locations normalized by the input spatial
+        - **input_x** (Tensor) - A 4-D tensor with dtype of float16 or float32 and shape of
+          :math:`(N, C, H_{in}, W_{in})`.
+        - **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is
+          :math:`(N, H_{out}, W_{out}, 2)`.
+          Used to specify the sampling pixel locations normalized by the input spatial
           dimensions.
     Outputs:
-        A 4-D Tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, C, H_{out}, W_{out})`.
+       A 4-D Tensor whose dtype is the same as `input_x` and whose shape is :math:`(N, C, H_{out}, W_{out})`.
     Raises:
         TypeError: If `input_x` or `grid` is not a Tensor.
@@ -9832,37 +9724,16 @@ class Pdist(Primitive):
     r"""
     Computes the p-norm distance between each pair of row vectors in the input.
-    .. math::
-        y[n] = \sqrt[p]{{\mid x_{i} - x_{j} \mid}^p},
-    where :math:`x_{i}, x_{j}` are two different row vectors in the input.
-    Args:
-        p (float): p value for the p norm distance to calculate between each vector pair ∈[0,∞]. Default: 2.0.
-    Inputs:
-        - **x** (Tensor) - Input tensor with dtype of float16 or float32 and shape of :math:`(N, M)`.
-    Outputs:
-        Tensor, has the same dtype as `x`, whose shape is :math:`(N * (N - 1) / 2)`.
-    Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is not float16, float32 or float64.
-        TypeError: If `p` is not a float.
-        ValueError: If `p` is a negative float.
-        ValueError: If dimension of `x` is not 2.
+    Refer to :func:`mindspore.ops.pdist` for more details.
     Supported Platforms:
-        ``Ascend`` ``CPU`` ``GPU``
+        ``GPU`` ``CPU``
     Examples:
-        >>> from mindspore import Tensor
-        >>> from mindspore.ops.operations.nn_ops import Pdist
+        >>> from mindspore import Tensor, ops
         >>> import numpy as np
         >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32))
-        >>> op = Pdist(p=2.0)
+        >>> op = ops.Pdist(p=2.0)
         >>> y = op(x)
         >>> print(y)
         [1.4142135 2.828427  1.4142135]
@@ -9887,9 +9758,11 @@ class UpsampleNearest3D(Primitive):
     One of `output_size` or `scales` must be given, and cannot specify both.
     Args:
-        output_size (Union[tuple[int], list[int]]): A tuple or list of int specifying the output volumetric size.
+        output_size (Union[tuple[int], list[int]], optional): A tuple or list of int
+            specifying the output volumetric size.
             Default: None.
-        scales (Union[tuple[float], list[float]]): A tuple or list of float specifying the upsampling factors.
+        scales (Union[tuple[float], list[float]], optional): A tuple or list of float
+            specifying the upsampling factors.
             Default: None.
     Inputs:
@@ -9901,8 +9774,8 @@ class UpsampleNearest3D(Primitive):
           Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})`.
     Raises:
-        TypeError: When `output_size` is not none and `output_size` is not list[int] or tuple[int].
-        TypeError: When `scales` is not none and `scales` is not list[float] or tuple[float].
+        TypeError: When `output_size` is not None and `output_size` is not list[int] or tuple[int].
+        TypeError: When `scales` is not None and `scales` is not list[float] or tuple[float].
         TypeError: If dtype of `x` is not int [float16, float32, float64].
         ValueError: If any value of `output_size` is negative or zero when `output_size` is not empty.
         ValueError: If any value of `scales` is negative or zero when `scales` is not empty.
@@ -9912,7 +9785,7 @@ class UpsampleNearest3D(Primitive):
         ValueError: If size of `output_size` is not equal 3 when `output_size` is specified.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
@@ -9945,10 +9818,10 @@ class UpsampleNearest3D(Primitive):
             scales = []
         validator.check_value_type('output_size', output_size, [tuple, list], self.name)
         for item in output_size:
-            validator.check_int(item, 0, Rel.GT, 'output_size_item', self.name)
+            validator.check_int(item, 0, validator.GT, 'output_size_item', self.name)
         validator.check_value_type('scales', scales, [tuple, list], self.name)
         for item in scales:
-            validator.check_float(item, 0, Rel.GT, 'scales_item', self.name)
+            validator.check_float(item, 0, validator.GT, 'scales_item', self.name)
         self.add_prim_attr('output_size', output_size)
         self.add_prim_attr('scales', scales)
@@ -9978,7 +9851,7 @@ class SparseApplyAdagradDA(Primitive):
                             Otherwise the behavior is undefined, but may exhibit less contention. Default: False.
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
+        - **var** (Parameter) - Variable to be updated.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same
           shape and dtype as `var`.
@@ -10002,8 +9875,7 @@ class SparseApplyAdagradDA(Primitive):
         TypeError: If `grad` is not a Tensor.
         TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
         TypeError: If use_locking is not a bool.
-        TypeError: If dtype of `var`, `grad_accum`, `grad_square_accum`, `grad_accum`,
-                   `lr`, `l1`, `l2` is neither float16 nor float32.
+        TypeError: If dtype of `var`, `grad_accum`, `grad_square_accum`, `grad_accum` is not the same.
         TypeError: If dtype of `grad_accum`, `grad_square_accum`, `grad_accum`
                      is not same as `var`.
         TypeError: If dtype of `indices` is neither int32 nor int64.
@@ -10014,16 +9886,12 @@ class SparseApplyAdagradDA(Primitive):
                       conversion of Parameter is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``GPU`` ``CPU``
     Examples:
-        >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.common.dtype as mstype
-        >>> import mindspore.ops.operations.nn_ops as nn_ops
-        >>> var = Tensor(np.array([[1,2], [1,2]]).astype(np.float32))
-        >>> grad_accum = Tensor(np.array([[2,1], [3,1]]).astype(np.float32))
-        >>> grad_square_accum = Tensor(np.array([[4,1], [5,1]]).astype(np.float32))
+        >>> var = Parameter(Tensor(np.array([[1,2], [1,2]]).astype(np.float32)))
+        >>> grad_accum = Parameter(Tensor(np.array([[2,1], [3,1]]).astype(np.float32)))
+        >>> grad_square_accum = Parameter(Tensor(np.array([[4,1], [5,1]]).astype(np.float32)))
         >>> grad = Tensor(np.array([[5,1], [6,1]]).astype(np.float32))
         >>> indices = Tensor(np.array([0, 1], dtype=np.int32))
         >>> lr = Tensor(2, mstype.float32)
@@ -10039,9 +9907,9 @@ class SparseApplyAdagradDA(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
-        sig.make_sig('grad_accum', dtype=sig.sig_dtype.T),
-        sig.make_sig('grad_square_accum', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('grad_square_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('grad', dtype=sig.sig_dtype.T),
         sig.make_sig('indices', dtype=sig.sig_dtype.T1),
         sig.make_sig('lr', dtype=sig.sig_dtype.T),
@@ -10111,7 +9979,7 @@ class SparseApplyMomentum(Primitive):
                       is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``GPU`` ``CPU``
     Examples:
         >>> import mindspore.ops.operations.nn_ops as nn_ops
@@ -10195,7 +10063,7 @@ class SparseApplyProximalGradientDescent(Primitive):
                       is not supported.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``GPU`` ``CPU``
     Examples:
         >>> import mindspore.ops.operations.nn_ops as nn_ops
@@ -10213,7 +10081,7 @@ class SparseApplyProximalGradientDescent(Primitive):
     """
     __mindspore_signature__ = (
-        sig.make_sig('var', dtype=sig.sig_dtype.T),
+        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
         sig.make_sig('alpha', dtype=sig.sig_dtype.T),
         sig.make_sig('l1', dtype=sig.sig_dtype.T),
         sig.make_sig('l2', dtype=sig.sig_dtype.T),
@@ -10231,50 +10099,51 @@ class SparseApplyProximalGradientDescent(Primitive):
 class NuclearNorm(Primitive):
     r"""
-    Returns the matrix nuclear norm of a given tensor.
+    Returns the matrix nuclear norm of a given Tensor.
     Attr `dim` specifies which two dimensions of the input `x` to calculate the nuclear norm across. If `dim` is None,
     the nuclear norm will be calculated across all dimensions of input. Because the nuclear norm is the sum of the
     singular values of the matrix, the input at this time should be 2-dimensional. That is, if the input is
     2-dimensional, we compute the nuclear norm of the input matrix. At this point, `dim` should be None. If you set
-    `dim`, it also needs to be in the proper range, although it doesn't work. If the input is 3-dimensional and above,
+    `dim`, it also needs to be in the proper range, otherwise it wonn't work. If the input is 3-dimensional and above,
     the attribute `dim` is required. It specifies which two dimensions of input to calculate the nuclear norm across.
-    According to the `dim` list, the input tensor is reordered by `dim`. The two dimensions pointed to by the attribute
+    According to the `dim` list, the input Tensor is reordered by `dim`. The two dimensions pointed to by the attribute
     `dim` are placed at the end, and the order of the other dimensions is relatively unchanged. Perform the SVD of each
-    slice of the adjusted tensor to obtain the singular value. Sum all of the singular value of each slice/matrix to
+    slice of the adjusted Tensor to obtain the singular value. Sum all of the singular value of each slice/matrix to
     obtain the nuclear norm.
     Args:
-        dim (Union[list(int), tuple(int)]): Specifies which two dimensions of `x` to calculate the matrix nuclear norm
+        dim (Union[list(int), tuple(int)], optional): Specifies which two
+            dimensions of `x` to calculate the matrix nuclear norm
             across. If `dim` is None, the nuclear norm will be calculated across all dimensions of `x`. The length of
             `dim` should be 2. The value in `dim` should be in this range:[-x_rank, x_rank). x_rank is the dimension of
             Tensor `x`. The value of `dim[0]` or `dim[1]` can not point to the same dimension. Default: None.
-        keepdim (bool): whether the output tensor have `dim` retained or not. Default: False.
+        keepdim (bool, optional): Whether the output Tensor have `dim` retained or not. Default: False.
     Inputs:
         - **x** (Tensor) - Input to compute the matrix nuclear norm. The dimension of `x` should be greater than or
           equal to 2. Data type must be float32 or float64.
     Outputs:
-        Tensor, output tensor with dimensions in `dim` reduced to 1 will be returned if `keepdim` is `True`;
-        otherwise a tensor with dimensions in `dim` removed is returned. The data type is same as `x`.
-    Supported Platforms:
-        ``Ascend`` ``CPU``
+        Tensor, output Tensor with dimensions in `dim` reduced to 1 will be returned if `keepdim` is `True`;
+        otherwise a Tensor with dimensions in `dim` removed is returned. The data type is same as `x`.
     Raises:
-        TypeError: If `x` is not a tensor.
+        TypeError: If `x` is not a Tensor.
         TypeError: If dtype of `x` is neither float32 nor float64.
         TypeError: If dtype of `dim` is neither list(int) nor tuple(int).
         TypeError: If dtype of `keepdim` is not bool.
         ValueError: If dimension of Tensor `x` is less than 2.
         ValueError: If the length of `dim` is not 2 when `dim` is set.
-        ValueError: If the dimension of tensor `x` is not 2 when `dim` is not set.
+        ValueError: If the dimension of Tensor `x` is not 2 when `dim` is not set.
         ValueError: If `dim[0]` or `dim[1]` point to the same dimension.
         ValueError: If `dim[0]` or `dim[1]` is not in this range:[-x_rank, x_rank).
                     x_rank is the dimension of Tensor `x`.
+    Supported Platforms:
+        ``Ascend`` ``CPU``
     Examples:
         >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
         ...                           [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]]), ms.float32)
@@ -10308,7 +10177,7 @@ class NuclearNorm(Primitive):
         """Initialize NuclearNorm."""
         validator.check_value_type("dim", dim, [list, tuple, type(None)], self.name)
         if dim is not None:
-            validator.check_int(len(dim), 2, Rel.EQ, 'length of dim_size', self.name)
+            validator.check_int(len(dim), 2, validator.EQ, 'length of dim_size', self.name)
             validator.check_is_int(dim[0], "dim[0]", self.name)
             validator.check_is_int(dim[1], "dim[1]", self.name)
         else:
@@ -10316,23 +10185,56 @@ class NuclearNorm(Primitive):
         validator.check_value_type("keepdim", keepdim, [bool], self.name)
+class GLU(Primitive):
+    r"""
+    Computes GLU (Gated Linear Unit activation function) of input tensors.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Refer to :func:`mindspore.ops.glu` for more details.
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+    Examples:
+        >>> from mindspore import ops, Tensor
+        >>> from mindspore import dtype as mstype
+        >>> import numpy as np
+        >>> axis = 0
+        >>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
+        ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
+        ...                            0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
+        ...                            0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
+        >>> glu = ops.GLU(axis=axis)
+        >>> y = glu(x)
+        >>> print(y)
+        [[[0.20028052 0.6916126  0.57412136 0.06512236 0.26307625]
+          [0.3682598  0.3093122  0.17306386 0.10212085 0.63814086]]]
+    """
+    @prim_attr_register
+    def __init__(self, axis=-1):
+        """Initialize GLU"""
+        validator.check_value_type("axis", axis, [int], self.name)
 class FractionalMaxPoolWithFixedKsize(Primitive):
     r"""
     Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
-    The max-pooling operation is applied in kH × kW regions by a stochastic step size determined by
-    the target output size. For any input size, the size of the specified output is H x W. The number
-    of output features is equal to the number of input planes.
+    The max-pooling operation is applied in :math:`(kH, kW)` regions by a stochastic step size determined by
+    the target output size `output_shape`.
+    The number of output features is equal to the number of input planes.
     Fractional MaxPooling is described in the paper `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_.
     Args:
-        ksize (Union[int, tuple[int]]): The size of kernel window used to take the maximum value.
-            The target ksize is H x W. ksize can be a tuple, or a single K for K x K.
-            specifying the window size (H, W) of the input tensor.
-        output_shape (Union[int, tuple[int]]): The target output size is H x W.
-            output_shape can be a tuple, or a single H for H x H.
-            specifying the size (H, W) of the output tensor.
-        data_format (str): The optional value for data format, is 'NCHW'.
+        ksize (Union[int, tuple[int]]): Size of the pooling window. `ksize` can be a tuple of two values
+          specify a shape :math:`(k_H, k_W)`, or a single int `K` for :math:`(K, K)`.
+        output_shape (Union[int, tuple[int]]): The target output shape. `output_shape` can be a
+          tuple of two values specify a shape :math:`(H_{out}, W_{out})`, or a single float `S` for :math:`(S, S)`.
+        data_format (str, optional): The optional value for data format, is 'NCHW'.
             Default: "NCHW".
     Inputs:
@@ -10343,21 +10245,23 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
     Outputs:
         - **y** (Tensor) - Has the same type as the `input_x`.
-          Has the shape :math:`(N, C, output\underline{~}shape{H}, output\underline{~}shape{W})`.
+          Has the shape :math:`(N, C, H_{out}, W_{out})`.
         - **argmax** (Tensor) -A tensor whose data type must be int64. Has the same shape as the `y`.
     Raises:
         TypeError: If data type of `input_x` is not one of the following: float16, float32, float64, int32, int64.
         TypeError: If data type of `random_samples` is not one of the following: float16, float32, float64.
         ValueError: If `ksize` is not a number and `ksize` is not a tuple of length 2.
         ValueError: If `output_shape` is not a number and `output_shape` is not a tuple of length 2.
-        ValueError: If the sum of `ksize`,`output_shape` and -1 is larger than the corresponding dimension of `input_x`.
+        ValueError: If the sum of `ksize` , `output_shape` and
+          -1 is larger than the corresponding dimension of `input_x`.
         ValueError: If the dimension of `random_samples` is not 3.
         ValueError: If the first dimension size of `input_x` and `random_samples` is not equal.
         ValueError: If the second dimension size of `input_x` and `random_samples` is not equal.
         ValueError: If the third dimension size of `random_samples` is not 2.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``CPU``
     Examples:
         >>> # the ksize is an int number and the output_shape is a tuple.
@@ -10393,3 +10297,129 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
         self.add_prim_attr("output_shape", self.output_shape)
         self.data_format = validator.check_string(data_format, ['NCHW'], 'data_format', self.name)
         self.init_prim_io_names(inputs=['input_x', 'random_samples'], outputs=['y', 'argmax'])
+class ChannelShuffle(Primitive):
+    r"""
+    Divide the channels in a tensor of shape (*, C, H, W) into g groups and
+    rearrange them as (*, C/g, g, H*W), while keeping the original tensor shapes.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Refer to :func:`mindspore.ops.channel_shuffle` for more detail.
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+    Examples:
+        >>> group = 2
+        >>> x = Tensor(np.arange(1 * 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
+        >>> channel_shuffle_func = ops.ChannelShuffle(group)
+        >>> y = channel_shuffle_func(x)
+        >>> print(y)
+        [[[[ 0  1]
+           [ 2  3]]
+           [[ 8  9]
+           [10 11]]
+           [[ 4  5]
+           [ 6  7]]
+           [[12 13]
+           [14 15]]]]
+    """
+    @prim_attr_register
+    def __init__(self, group):
+        """Initialize ChannelShuffle"""
+        if not isinstance(group, int):
+            raise ValueError(f"For '{self.name}', attr 'group' must be an positive int number")
+        self.init_prim_io_names(inputs=['x'], outputs=['y'])
+class MaxPoolWithArgmaxV2(Primitive):
+    r"""
+    Performs max pooling on the input Tensor and returns both max values and indices.
+    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
+    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
+    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows:
+    .. math::
+        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
+        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
+    Args:
+        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax
+            value, is an int number that represents height and width of the kernel, or a tuple of
+            two int numbers that represent height and width respectively.
+        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
+            not only the height of movement but also the width of movement, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: None, meaning that `strides = kernel_size`.
+        pads (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both
+            strides, or a tuple of three int numbers that represent depth, height and width of movement respectively.
+        dilation (Union[int, tuple[int]]): Default: '(1, 1)'.
+        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
+        argmax_type (mindspore.dtype) : The dtype for argmax. Default: mstype.int64.
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8,
+          int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
+    Outputs:
+        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
+        - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
+          It has the same data type as `x`.
+        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32 or int64.
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        ValueError: If length of shape of `x` is not equal to 4.
+        TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple.
+        ValueError: If `kernel_size`, `strides` or `dilation` is less than 1.
+        ValueError: If `pads` is less than 0.
+        ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
+        TypeError: If `ceil_mode` is not bool.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
+        >>> maxpool_arg_v2_op = ops.MaxPoolWithArgmaxV2(kernel_size=(3, 2), strides=(2, 1))
+        >>> output_tensor, argmax = maxpool_arg_v2_op(x)
+        >>> print(output_tensor.shape)
+        (20, 16, 24, 31)
+        >>> pirnt(argmax.shape)
+        (20, 16, 24, 31)
+    """
+    @prim_attr_register
+    def __init__(self, kernel_size, strides=None, pads=0, dilation=(1, 1,), ceil_mode=False, argmax_type=mstype.int64):
+        """Initialize MaxPoolWithArgmaxV2."""
+        self.init_prim_io_names(inputs=["x"], outputs=["output", "argmax"])
+        validator.check_value_type("ceil_mode", ceil_mode, bool, self.name)
+        self.ceil_mode = ceil_mode
+        validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name)
+        argmax_type_valid_values = (mstype.int32, mstype.int64)
+        validator.check_type_name("argmax_type", argmax_type, argmax_type_valid_values, self.name)
+        if argmax_type == mstype.int32:
+            self.add_prim_attr("argmax_type", 3)
+        elif argmax_type == mstype.int64:
+            self.add_prim_attr("argmax_type", 4)
+        else:
+            raise ValueError(
+                f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, but got {argmax_type}.")
+        self.kernel_size = _check_positive_int_or_tuple("kernel_size", kernel_size, self.name, ret_four=True,
+                                                        allow_four=True)
+        if strides is None:
+            strides = kernel_size
+        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, ret_four=True, allow_four=True)
+        self.pads = _check_positive_int_or_tuple("pads", pads, self.name, ret_four=True, allow_four=True,
+                                                 strict_positive=False)
+        self.dilation = _check_positive_int_or_tuple("dilation", dilation, self.name, ret_four=True, allow_four=True)
+        self.add_prim_attr("kernel_size", self.kernel_size)
+        self.add_prim_attr("strides", self.strides)
+        self.add_prim_attr("pads", self.pads)
+        self.add_prim_attr("dilation", self.dilation)
+        self.add_prim_attr("ceil_mode", self.ceil_mode)