PyPI - mindspore - Versions diffs - 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl - Mend

mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show

mindspore/.commit_id +1 -1
mindspore/Third_Party_Open_Source_Software_Notice +2 -2
mindspore/__init__.py +5 -2
mindspore/_akg/akg/build_module.py +5 -6
mindspore/_akg/akg/composite/build_module.py +49 -16
mindspore/_akg/akg/composite/split_stitch.py +10 -11
mindspore/_akg/akg/config/repository.json +195 -0
mindspore/_akg/akg/global_configs.py +5 -1
mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
mindspore/_akg/akg/tvm/api.py +4 -3
mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
mindspore/_akg/akg/tvm/build_module.py +16 -1
mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
mindspore/_akg/akg/tvm/ir_builder.py +1 -1
mindspore/_akg/akg/tvm/module.py +1 -2
mindspore/_akg/akg/tvm/stmt.py +2 -2
mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
mindspore/_akg/akg/utils/kernel_exec.py +58 -260
mindspore/_akg/akg/utils/op_dsl.py +17 -1
mindspore/_akg/akg/utils/result_analysis.py +4 -24
mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_check_jit_forbidden_api.py +5 -1
mindspore/_checkparam.py +79 -62
mindspore/_extends/graph_kernel/__init__.py +0 -1
mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
mindspore/_extends/graph_kernel/splitter.py +1 -9
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
mindspore/_extends/parse/__init__.py +19 -17
mindspore/_extends/parse/namespace.py +7 -36
mindspore/_extends/parse/parser.py +375 -189
mindspore/_extends/parse/resources.py +36 -41
mindspore/_extends/parse/standard_method.py +350 -245
mindspore/_extends/parse/trope.py +2 -12
mindspore/_extends/remote/kernel_build_server.py +24 -7
mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
mindspore/_install_custom.py +43 -0
mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/amp.py +85 -19
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/boost/base.py +2 -2
mindspore/boost/boost.py +27 -32
mindspore/boost/boost_cell_wrapper.py +37 -13
mindspore/boost/grad_accumulation.py +1 -1
mindspore/boost/grad_freeze.py +34 -6
mindspore/boost/group_loss_scale_manager.py +15 -14
mindspore/boost/less_batch_normalization.py +28 -3
mindspore/common/__init__.py +15 -11
mindspore/common/_auto_dynamic.py +68 -0
mindspore/common/_jit_fallback_utils.py +111 -0
mindspore/common/_register_for_adapter.py +17 -5
mindspore/common/_register_for_tensor.py +2 -2
mindspore/common/_stub_tensor.py +18 -15
mindspore/common/_utils.py +31 -7
mindspore/common/api.py +269 -101
mindspore/common/auto_dynamic_shape.py +498 -0
mindspore/common/dtype.py +61 -21
mindspore/common/dump.py +9 -7
mindspore/common/initializer.py +106 -76
mindspore/common/jit_config.py +35 -14
mindspore/common/lazy_inline.py +187 -0
mindspore/common/mindir_util.py +101 -0
mindspore/common/mutable.py +10 -13
mindspore/common/parameter.py +246 -55
mindspore/common/seed.py +13 -7
mindspore/common/sparse_tensor.py +29 -33
mindspore/common/tensor.py +907 -251
mindspore/communication/__init__.py +7 -4
mindspore/communication/_comm_helper.py +84 -4
mindspore/communication/management.py +160 -88
mindspore/config/op_info.config +99 -75
mindspore/config/super_bar_config.json +36 -4
mindspore/context.py +526 -219
mindspore/dataset/__init__.py +9 -46
mindspore/dataset/audio/__init__.py +4 -19
mindspore/dataset/audio/transforms.py +545 -233
mindspore/dataset/audio/utils.py +21 -18
mindspore/dataset/callback/ds_callback.py +42 -13
mindspore/dataset/core/config.py +158 -100
mindspore/dataset/core/validator_helpers.py +1 -63
mindspore/dataset/debug/debug_hook.py +45 -13
mindspore/dataset/debug/pre_defined_hook.py +5 -5
mindspore/dataset/engine/__init__.py +0 -5
mindspore/dataset/engine/cache_client.py +38 -15
mindspore/dataset/engine/datasets.py +615 -278
mindspore/dataset/engine/datasets_audio.py +154 -283
mindspore/dataset/engine/datasets_standard_format.py +104 -116
mindspore/dataset/engine/datasets_text.py +443 -326
mindspore/dataset/engine/datasets_user_defined.py +251 -164
mindspore/dataset/engine/datasets_vision.py +839 -1443
mindspore/dataset/engine/iterators.py +11 -4
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
mindspore/dataset/engine/obs/util.py +3 -0
mindspore/dataset/engine/offload.py +6 -6
mindspore/dataset/engine/queue.py +15 -14
mindspore/dataset/engine/samplers.py +39 -23
mindspore/dataset/engine/serializer_deserializer.py +22 -6
mindspore/dataset/engine/validators.py +21 -331
mindspore/dataset/text/__init__.py +5 -33
mindspore/dataset/text/transforms.py +334 -165
mindspore/dataset/text/utils.py +215 -145
mindspore/dataset/transforms/__init__.py +1 -1
mindspore/dataset/transforms/c_transforms.py +3 -2
mindspore/dataset/transforms/py_transforms_util.py +40 -12
mindspore/dataset/transforms/transforms.py +174 -71
mindspore/dataset/utils/browse_dataset.py +25 -17
mindspore/dataset/utils/line_reader.py +24 -21
mindspore/dataset/vision/__init__.py +5 -26
mindspore/dataset/vision/c_transforms.py +177 -165
mindspore/dataset/vision/py_transforms.py +114 -119
mindspore/dataset/vision/py_transforms_util.py +54 -51
mindspore/dataset/vision/transforms.py +1127 -381
mindspore/dataset/vision/utils.py +54 -38
mindspore/dataset/vision/validators.py +12 -2
mindspore/experimental/map_parameter.py +38 -4
mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
mindspore/experimental/optim/adam.py +192 -0
mindspore/experimental/optim/adamw.py +181 -0
mindspore/experimental/optim/lr_scheduler.py +1427 -0
mindspore/experimental/optim/optimizer.py +252 -0
mindspore/experimental/optim/sgd.py +147 -0
mindspore/gen_ops.py +273 -0
mindspore/include/OWNERS +1 -2
mindspore/include/api/context.h +21 -1
mindspore/include/api/data_type.h +2 -1
mindspore/include/api/graph.h +0 -15
mindspore/include/api/kernel.h +2 -0
mindspore/include/api/kernel_api.h +37 -12
mindspore/include/api/model.h +29 -42
mindspore/include/api/model_group.h +14 -3
mindspore/include/api/model_parallel_runner.h +18 -2
mindspore/include/api/serialization.h +26 -0
mindspore/include/api/status.h +1 -0
mindspore/include/api/types.h +38 -4
mindspore/include/c_api/ms/abstract.h +67 -0
mindspore/include/c_api/ms/attribute.h +197 -0
mindspore/include/c_api/ms/base/handle_types.h +43 -0
mindspore/include/c_api/ms/base/macros.h +32 -0
mindspore/include/c_api/ms/base/status.h +33 -0
mindspore/include/c_api/ms/base/types.h +282 -0
mindspore/include/c_api/ms/context.h +102 -0
mindspore/include/c_api/ms/graph.h +160 -0
mindspore/include/c_api/ms/node.h +606 -0
mindspore/include/c_api/ms/tensor.h +161 -0
mindspore/include/c_api/ms/value.h +84 -0
mindspore/include/c_api/status_c.h +3 -0
mindspore/include/dataset/constants.h +6 -12
mindspore/include/dataset/execute.h +23 -13
mindspore/include/dataset/text.h +26 -26
mindspore/include/dataset/transforms.h +25 -31
mindspore/include/dataset/vision.h +60 -60
mindspore/include/dataset/vision_ascend.h +5 -6
mindspore/include/dataset/vision_lite.h +17 -17
mindspore/include/mindapi/base/format.h +0 -1
mindspore/include/mindapi/base/type_id.h +2 -1
mindspore/include/mindapi/base/types.h +5 -1
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libjemalloc.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libmpi_adapter.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/libps_cache.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/log.py +9 -6
mindspore/mindrecord/filereader.py +33 -4
mindspore/mindrecord/filewriter.py +70 -35
mindspore/mindrecord/mindpage.py +40 -34
mindspore/mindrecord/shardreader.py +1 -1
mindspore/mindrecord/shardsegment.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
mindspore/mindrecord/tools/csv_to_mr.py +29 -13
mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
mindspore/nn/cell.py +463 -169
mindspore/nn/dynamic_lr.py +47 -43
mindspore/nn/layer/activation.py +225 -82
mindspore/nn/layer/basic.py +121 -79
mindspore/nn/layer/channel_shuffle.py +21 -21
mindspore/nn/layer/combined.py +33 -26
mindspore/nn/layer/container.py +277 -22
mindspore/nn/layer/conv.py +441 -304
mindspore/nn/layer/dense.py +19 -13
mindspore/nn/layer/embedding.py +62 -49
mindspore/nn/layer/flash_attention.py +264 -0
mindspore/nn/layer/image.py +50 -39
mindspore/nn/layer/math.py +62 -51
mindspore/nn/layer/normalization.py +219 -167
mindspore/nn/layer/padding.py +58 -70
mindspore/nn/layer/pooling.py +334 -287
mindspore/nn/layer/rnn_cells.py +53 -38
mindspore/nn/layer/rnns.py +59 -56
mindspore/nn/layer/thor_layer.py +52 -44
mindspore/nn/layer/timedistributed.py +6 -4
mindspore/nn/layer/transformer.py +284 -164
mindspore/nn/learning_rate_schedule.py +34 -25
mindspore/nn/loss/__init__.py +3 -2
mindspore/nn/loss/loss.py +554 -311
mindspore/nn/optim/ada_grad.py +12 -9
mindspore/nn/optim/adadelta.py +14 -11
mindspore/nn/optim/adafactor.py +19 -16
mindspore/nn/optim/adam.py +62 -47
mindspore/nn/optim/adamax.py +13 -10
mindspore/nn/optim/adasum.py +12 -8
mindspore/nn/optim/asgd.py +10 -9
mindspore/nn/optim/ftrl.py +20 -17
mindspore/nn/optim/lamb.py +16 -12
mindspore/nn/optim/lars.py +8 -6
mindspore/nn/optim/lazyadam.py +25 -20
mindspore/nn/optim/momentum.py +10 -7
mindspore/nn/optim/optimizer.py +61 -9
mindspore/nn/optim/proximal_ada_grad.py +14 -13
mindspore/nn/optim/rmsprop.py +17 -13
mindspore/nn/optim/rprop.py +30 -17
mindspore/nn/optim/sgd.py +40 -23
mindspore/nn/optim/thor.py +24 -26
mindspore/nn/probability/bijector/bijector.py +11 -11
mindspore/nn/probability/bijector/exp.py +1 -1
mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
mindspore/nn/probability/bijector/invert.py +1 -1
mindspore/nn/probability/bijector/power_transform.py +29 -29
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +5 -5
mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
mindspore/nn/probability/distribution/_utils/utils.py +1 -1
mindspore/nn/probability/distribution/bernoulli.py +9 -9
mindspore/nn/probability/distribution/beta.py +8 -8
mindspore/nn/probability/distribution/categorical.py +23 -15
mindspore/nn/probability/distribution/cauchy.py +5 -6
mindspore/nn/probability/distribution/distribution.py +3 -3
mindspore/nn/probability/distribution/exponential.py +4 -4
mindspore/nn/probability/distribution/gamma.py +10 -10
mindspore/nn/probability/distribution/geometric.py +8 -8
mindspore/nn/probability/distribution/gumbel.py +8 -9
mindspore/nn/probability/distribution/half_normal.py +5 -5
mindspore/nn/probability/distribution/laplace.py +5 -5
mindspore/nn/probability/distribution/log_normal.py +12 -11
mindspore/nn/probability/distribution/logistic.py +8 -8
mindspore/nn/probability/distribution/normal.py +6 -5
mindspore/nn/probability/distribution/poisson.py +10 -11
mindspore/nn/probability/distribution/student_t.py +8 -9
mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
mindspore/nn/probability/distribution/uniform.py +11 -11
mindspore/nn/reinforcement/tensor_array.py +2 -2
mindspore/nn/sparse/sparse.py +9 -9
mindspore/nn/wrap/cell_wrapper.py +188 -63
mindspore/nn/wrap/grad_reducer.py +21 -12
mindspore/nn/wrap/loss_scale.py +136 -49
mindspore/numpy/__init__.py +4 -4
mindspore/numpy/array_creations.py +55 -56
mindspore/numpy/array_ops.py +134 -35
mindspore/numpy/logic_ops.py +66 -20
mindspore/numpy/math_ops.py +142 -139
mindspore/numpy/utils_const.py +2 -2
mindspore/offline_debug/convert_async.py +2 -2
mindspore/ops/_grad_experimental/__init__.py +7 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/cast.py +52 -0
mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
mindspore/ops/_op_impl/aicpu/eps.py +32 -0
mindspore/ops/_op_impl/aicpu/eye.py +4 -4
mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
mindspore/ops/_op_impl/aicpu/lu.py +39 -0
mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
mindspore/ops/_op_impl/aicpu/median.py +1 -0
mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
mindspore/ops/_op_impl/tbe/__init__.py +6 -4
mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
mindspore/ops/_op_impl/tbe/im2col.py +4 -4
mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
mindspore/ops/_primitive_cache.py +1 -1
mindspore/ops/_tracefunc.py +241 -0
mindspore/ops/_utils/utils.py +10 -2
mindspore/ops/_vmap/vmap_array_ops.py +5 -3
mindspore/ops/_vmap/vmap_base.py +5 -4
mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
mindspore/ops/_vmap/vmap_math_ops.py +5 -2
mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
mindspore/ops/arg_dtype_cast.py +54 -0
mindspore/ops/composite/__init__.py +7 -5
mindspore/ops/composite/base.py +78 -34
mindspore/ops/composite/math_ops.py +5 -695
mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
mindspore/ops/deprecated.py +304 -0
mindspore/ops/function/__init__.py +41 -4
mindspore/ops/function/array_func.py +1108 -467
mindspore/ops/function/clip_func.py +94 -27
mindspore/ops/function/debug_func.py +3 -1
mindspore/ops/function/grad/grad_func.py +82 -73
mindspore/ops/function/image_func.py +28 -12
mindspore/ops/function/linalg_func.py +135 -39
mindspore/ops/function/math_func.py +3779 -894
mindspore/ops/function/nn_func.py +1584 -657
mindspore/ops/function/parameter_func.py +13 -3
mindspore/ops/function/random_func.py +247 -153
mindspore/ops/function/sparse_func.py +14 -11
mindspore/ops/function/sparse_unary_func.py +173 -47
mindspore/ops/function/spectral_func.py +8 -4
mindspore/ops/function/vmap_func.py +8 -7
mindspore/ops/functional.py +47 -16
mindspore/ops/op_info_register.py +346 -86
mindspore/ops/operations/__init__.py +38 -22
mindspore/ops/operations/_grad_ops.py +145 -149
mindspore/ops/operations/_inner_ops.py +298 -56
mindspore/ops/operations/_ms_kernel.py +3 -3
mindspore/ops/operations/_quant_ops.py +24 -28
mindspore/ops/operations/_rl_inner_ops.py +9 -7
mindspore/ops/operations/_scalar_ops.py +115 -0
mindspore/ops/operations/_sequence_ops.py +148 -10
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/_thor_ops.py +2 -2
mindspore/ops/operations/array_ops.py +1239 -561
mindspore/ops/operations/comm_ops.py +166 -90
mindspore/ops/operations/control_ops.py +3 -3
mindspore/ops/operations/custom_ops.py +124 -102
mindspore/ops/operations/debug_ops.py +24 -11
mindspore/ops/operations/image_ops.py +86 -71
mindspore/ops/operations/inner_ops.py +18 -13
mindspore/ops/operations/linalg_ops.py +30 -11
mindspore/ops/operations/math_ops.py +1730 -435
mindspore/ops/operations/nn_ops.py +1953 -943
mindspore/ops/operations/other_ops.py +65 -43
mindspore/ops/operations/random_ops.py +258 -98
mindspore/ops/operations/rl_ops.py +4 -36
mindspore/ops/operations/sparse_ops.py +38 -33
mindspore/ops/operations/spectral_ops.py +8 -4
mindspore/ops/primitive.py +66 -44
mindspore/ops/signature.py +5 -5
mindspore/parallel/_auto_parallel_context.py +80 -19
mindspore/parallel/_cost_model_context.py +42 -0
mindspore/parallel/_offload_context.py +162 -72
mindspore/parallel/_parallel_serialization.py +2 -2
mindspore/parallel/_ps_context.py +16 -4
mindspore/parallel/_recovery_context.py +2 -1
mindspore/parallel/_tensor.py +15 -13
mindspore/parallel/_transformer/layers.py +8 -6
mindspore/parallel/_transformer/loss.py +1 -0
mindspore/parallel/_transformer/moe.py +7 -7
mindspore/parallel/_transformer/op_parallel_config.py +12 -1
mindspore/parallel/_transformer/transformer.py +34 -14
mindspore/parallel/_utils.py +36 -14
mindspore/parallel/algo_parameter_config.py +114 -20
mindspore/parallel/checkpoint_transform.py +16 -18
mindspore/parallel/shard.py +16 -13
mindspore/profiler/__init__.py +1 -1
mindspore/profiler/common/struct_type.py +3 -3
mindspore/profiler/common/util.py +3 -2
mindspore/profiler/envprofiling.py +11 -4
mindspore/profiler/parser/aicpu_data_parser.py +5 -3
mindspore/profiler/parser/ascend_flops_generator.py +94 -0
mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
mindspore/profiler/parser/ascend_op_generator.py +276 -0
mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
mindspore/profiler/parser/base_timeline_generator.py +11 -7
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
mindspore/profiler/parser/flops_parser.py +15 -11
mindspore/profiler/parser/framework_parser.py +92 -73
mindspore/profiler/parser/hccl_parser.py +16 -12
mindspore/profiler/parser/integrator.py +22 -11
mindspore/profiler/parser/memory_usage_parser.py +36 -11
mindspore/profiler/parser/minddata_analyzer.py +12 -14
mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
mindspore/profiler/parser/msadvisor_parser.py +8 -4
mindspore/profiler/parser/op_intermediate_parser.py +5 -2
mindspore/profiler/parser/optime_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +4 -5
mindspore/profiler/parser/step_trace_parser.py +11 -14
mindspore/profiler/profiling.py +678 -377
mindspore/rewrite/api/node.py +211 -54
mindspore/rewrite/api/node_type.py +5 -0
mindspore/rewrite/api/pattern_engine.py +22 -23
mindspore/rewrite/api/scoped_value.py +20 -17
mindspore/rewrite/api/symbol_tree.py +252 -106
mindspore/rewrite/api/tree_node_helper.py +3 -0
mindspore/rewrite/ast_helpers/__init__.py +2 -1
mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
mindspore/rewrite/common/rewrite_elog.py +5 -1
mindspore/rewrite/namer.py +51 -51
mindspore/rewrite/namespace.py +14 -5
mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
mindspore/rewrite/node/call_function.py +79 -0
mindspore/rewrite/node/cell_container.py +135 -0
mindspore/rewrite/node/control_flow.py +88 -0
mindspore/rewrite/{node.py → node/node.py} +313 -247
mindspore/rewrite/node/node_manager.py +254 -0
mindspore/rewrite/node/node_topological_manager.py +243 -0
mindspore/rewrite/parsers/arguments_parser.py +22 -21
mindspore/rewrite/parsers/assign_parser.py +225 -239
mindspore/rewrite/parsers/attribute_parser.py +9 -7
mindspore/rewrite/parsers/class_def_parser.py +179 -218
mindspore/rewrite/parsers/constant_parser.py +9 -6
mindspore/rewrite/parsers/container_parser.py +9 -7
mindspore/rewrite/parsers/for_parser.py +36 -15
mindspore/rewrite/parsers/function_def_parser.py +23 -20
mindspore/rewrite/parsers/if_parser.py +28 -24
mindspore/rewrite/parsers/module_parser.py +202 -25
mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
mindspore/rewrite/parsers/return_parser.py +6 -6
mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
mindspore/rewrite/sparsify/sparsify.py +4 -1
mindspore/rewrite/sparsify/utils.py +11 -5
mindspore/rewrite/symbol_tree.py +577 -732
mindspore/rewrite/symbol_tree_builder.py +9 -175
mindspore/rewrite/symbol_tree_dumper.py +2 -2
mindspore/run_check/_check_version.py +46 -39
mindspore/run_check/run_check.py +3 -2
mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
mindspore/safeguard/rewrite_obfuscation.py +517 -0
mindspore/scipy/__init__.py +1 -1
mindspore/scipy/linalg.py +67 -61
mindspore/scipy/ops.py +5 -41
mindspore/scipy/ops_grad.py +3 -2
mindspore/scipy/ops_wrapper.py +5 -5
mindspore/scipy/optimize/line_search.py +8 -8
mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
mindspore/scipy/optimize/minimize.py +16 -12
mindspore/scipy/utils.py +1 -52
mindspore/scipy/utils_const.py +4 -4
mindspore/train/__init__.py +4 -4
mindspore/train/_utils.py +13 -5
mindspore/train/amp.py +410 -148
mindspore/train/anf_ir_pb2.py +16 -4
mindspore/train/callback/_backup_and_restore.py +8 -11
mindspore/train/callback/_callback.py +80 -3
mindspore/train/callback/_checkpoint.py +82 -51
mindspore/train/callback/_early_stop.py +12 -15
mindspore/train/callback/_history.py +1 -1
mindspore/train/callback/_lambda_callback.py +13 -13
mindspore/train/callback/_landscape.py +21 -17
mindspore/train/callback/_loss_monitor.py +9 -10
mindspore/train/callback/_on_request_exit.py +16 -33
mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
mindspore/train/callback/_summary_collector.py +44 -30
mindspore/train/callback/_time_monitor.py +62 -12
mindspore/train/data_sink.py +10 -16
mindspore/train/dataset_helper.py +154 -86
mindspore/train/loss_scale_manager.py +14 -9
mindspore/train/metrics/__init__.py +10 -2
mindspore/train/metrics/accuracy.py +1 -1
mindspore/train/metrics/auc.py +1 -1
mindspore/train/metrics/bleu_score.py +2 -2
mindspore/train/metrics/confusion_matrix.py +14 -14
mindspore/train/metrics/cosine_similarity.py +3 -3
mindspore/train/metrics/dice.py +1 -1
mindspore/train/metrics/fbeta.py +1 -1
mindspore/train/metrics/hausdorff_distance.py +8 -6
mindspore/train/metrics/mean_surface_distance.py +5 -4
mindspore/train/metrics/metric.py +49 -17
mindspore/train/metrics/occlusion_sensitivity.py +4 -4
mindspore/train/metrics/perplexity.py +1 -1
mindspore/train/metrics/precision.py +2 -2
mindspore/train/metrics/recall.py +2 -3
mindspore/train/metrics/roc.py +7 -7
mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
mindspore/train/metrics/topk.py +7 -4
mindspore/train/mind_ir_pb2.py +193 -48
mindspore/train/model.py +377 -133
mindspore/train/serialization.py +697 -245
mindspore/train/summary/_summary_adapter.py +5 -2
mindspore/train/summary/_writer_pool.py +4 -3
mindspore/train/summary/summary_record.py +25 -23
mindspore/train/train_thor/convert_utils.py +39 -23
mindspore/train/train_thor/dataset_helper.py +4 -3
mindspore/train/train_thor/model_thor.py +8 -8
mindspore/version.py +1 -1
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
mindspore/_akg/akg/tvm/rpc/base.py +0 -182
mindspore/_akg/akg/tvm/rpc/client.py +0 -436
mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
mindspore/_akg/akg/tvm/rpc/server.py +0 -413
mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
mindspore/_extends/graph_kernel/expander.py +0 -80
mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
mindspore/_extends/parse/jit_fallback_modules.py +0 -51
mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
mindspore/dataset/engine/graphdata.py +0 -1586
mindspore/include/api/net.h +0 -142
mindspore/ops/_grad/grad_array_ops.py +0 -1347
mindspore/ops/_grad/grad_clip_ops.py +0 -84
mindspore/ops/_grad/grad_debug_ops.py +0 -68
mindspore/ops/_grad/grad_inner_ops.py +0 -235
mindspore/ops/_grad/grad_math_ops.py +0 -1684
mindspore/ops/_grad/grad_nn_ops.py +0 -1529
mindspore/ops/_grad/grad_other_ops.py +0 -89
mindspore/ops/_grad/grad_sequence_ops.py +0 -296
mindspore/ops/_grad/grad_sparse.py +0 -323
mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
mindspore/rewrite/node_visitor.py +0 -44
mindspore/rewrite/topological_manager.py +0 -203
mindspore/scipy/sparse/linalg.py +0 -192
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0

mindspore/ops/function/nn_func.py CHANGED Viewed

@@ -16,8 +16,8 @@
 """Defines nn operators with functional form."""
 from __future__ import absolute_import
 from math import pi, log
-import numpy as np
+from mindspore import context
 from mindspore import log as logger
 import mindspore.ops as ops
 from mindspore.ops.primitive import constexpr, _primexpr
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
 from mindspore.ops.operations import _sequence_ops as seq
 import mindspore.common.dtype as mstype
 from mindspore.ops.function.math_func import logsumexp
-from mindspore.ops.function.random_func import _get_seed
+from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
 from mindspore.common.tensor import Tensor
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore.ops._primitive_cache import _get_cache_prim
@@ -39,6 +39,8 @@ from mindspore.ops.operations.nn_ops import PadV3
 from mindspore.ops.operations.nn_ops import ChannelShuffle
 from mindspore.ops.operations.nn_ops import TripletMarginLoss
 from mindspore.ops.operations._inner_ops import SiLU
+from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
+from mindspore.common.api import _function_forbid_reuse
 slice_ = P.Slice()
 fast_gelu_ = P.FastGeLU()
@@ -47,12 +49,19 @@ hardswish_ = P.HSwish()
 mish_ = NN_OPS.Mish()
 selu_ = NN_OPS.SeLU()
 scalar_to_tensor_ = P.ScalarToTensor()
+list_to_tensor_ = ListToTensor()
+tuple_to_tensor_ = TupleToTensor()
+tensor_to_tuple_ = TensorToTuple()
+cast_ = P.Cast()
 sigmoid_ = NN_OPS.Sigmoid()
-check_positive_int_const = constexpr(validator.check_positive_int)
-check_positive_int_sequence_const = constexpr(validator.check_positive_int_sequence)
-check_positive_float_const = constexpr(validator.check_positive_float)
-check_positive_float_sequence_const = constexpr(validator.check_positive_float_sequence)
+check_positive_int_const = validator.check_positive_int
+check_positive_int_sequence_const = validator.check_positive_int_sequence
+check_positive_float_const = validator.check_positive_float
+check_positive_float_sequence_const = validator.check_positive_float_sequence
 check_bool_const = constexpr(validator.check_bool)
+check_int_const = validator.check_is_int
+check_non_negative_float_const = validator.check_non_negative_float
+check_string_const = constexpr(validator.check_string)
 def adaptive_avg_pool2d(input, output_size):
@@ -76,10 +85,13 @@ def adaptive_avg_pool2d(input, output_size):
         * (w_{end}- w_{start})}
         \end{align}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): The input of adaptive_avg_pool2d, which is a 3D or 4D tensor,
             with float16, float32 or float64 data type.
-        output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
+        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
             or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
             If it is None, it means the output size is the same as the input size.
@@ -105,9 +117,12 @@ def adaptive_avg_pool2d(input, output_size):
         ValueError: If the dimension of `input` is less than or equal to the dimension of `output_size`.
     Supported Platforms:
-        ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> # case 1: output_size=(None, 2)
         >>> input = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
         ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
@@ -168,7 +183,7 @@ def adaptive_avg_pool3d(input, output_size):
     Args:
         input (Tensor): The input of adaptive_avg_pool3d, which is a 5D or 4D Tensor.
-        output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(D, H, W)`,
+        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(D, H, W)`,
             or an int D for :math:`(D, D, D)`. :math:`D`, :math:`H` and :math:`W` can be int or None
             which means the output size is the same as that of the input.
@@ -185,6 +200,9 @@ def adaptive_avg_pool3d(input, output_size):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> # case 1: output_size=(3, 3, 4)
         >>> output_size=(3, 3, 4)
         >>> input_val = np.random.randn(4, 3, 5, 6, 7)
@@ -215,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
 def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
     """Checks the type of avgpool1d input"""
     validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
-    validator.check_value_type('stride', stride, [int], 'avg_pool1d')
+    validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
     validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
     validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
     validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
@@ -245,15 +263,13 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     Args:
         input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
-        kernel_size (int): The size of kernel window used to take the average value. Default: 1.
-        stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents the height and
-            width of movement are both strides, or a tuple of two int numbers that represent height and width of
-            movement respectively. Default: 1.
-        padding (Union(int, tuple[int])): The pad value to be filled. If `padding` is an integer, the paddings of left
-            and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
-            equal to `padding[0]` and `padding[1]` correspondingly. Default: 0.
-        ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: False.
-        count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: True.
+        kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
+        stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
+            number or a tuple of one int number. Default: ``1`` .
+        padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
+            or a tuple of one integer. Default: ``0`` .
+        ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
+        count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
     Returns:
         Tensor of shape :math:`(N, C_{out}, L_{out})`.
@@ -271,6 +287,9 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
         >>> output = ops.avg_pool1d(input_x, kernel_size=6, stride=1)
         >>> print(output.shape)
@@ -280,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         raise TypeError("For avg_pool1d, the input input_x must be tensor")
     if len(input_x.shape) != 3:
-        raise ValueError("For avg_pool1d, input must have 3 dim, but got {}.".format(len(input_x.shape)))
+        raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
     _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
     if isinstance(padding, int):
         check_non_negative_int(padding, 'padding', 'avg_pool1d')
         padding = (0, 0, 0, 0, padding, padding)
     elif isinstance(padding, tuple):
-        if len(padding) != 2:
-            raise ValueError("For avg_pool1d, padding should be int or tuple of length 2.")
+        if len(padding) != 1:
+            raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
         for item in padding:
             check_non_negative_int(item, 'padding', 'avg_pool1d')
-        padding = (0, 0, 0, 0, padding[0], padding[1])
+        padding = (0, 0, 0, 0, padding[0], padding[0])
     else:
-        raise TypeError("For avg_pool1d, padding should be int or tuple of length 2.")
+        raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
+    if isinstance(stride, tuple):
+        if len(stride) != 1:
+            raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
+        stride = stride[0]
     expand_op = _get_cache_prim(P.ExpandDims)()
     squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
@@ -310,7 +334,7 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     return input_x
-@constexpr
+@_primexpr
 def _check_avgpool_2d_kernel_size(kernel_size):
     """check and calculate the avgpool2d kernel_size"""
     if isinstance(kernel_size, int):
@@ -327,7 +351,7 @@ def _check_avgpool_2d_kernel_size(kernel_size):
     return kernel_size
-@constexpr
+@_primexpr
 def _check_avgpool_2d_stride(stride):
     """check and calculate the avgpool2d stride"""
     if isinstance(stride, int):
@@ -344,7 +368,7 @@ def _check_avgpool_2d_stride(stride):
     return stride
-@constexpr
+@_primexpr
 def _check_avgpool_2d_padding(padding):
     """check and calculate the avgpool2d padding"""
     if isinstance(padding, int):
@@ -361,7 +385,7 @@ def _check_avgpool_2d_padding(padding):
     return padding
-@constexpr
+@_primexpr
 def _check_avg_pool2d_type_and_value(ceil_mode, count_include_pad, divisor_override):
     """check the type of avgpool2d input"""
     validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool2d')
@@ -388,18 +412,18 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         input_x (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. It is an int number
             that represents height and width of the kernel, or a tuple of two int numbers that represent height and
-            width respectively. Default: 1.
+            width respectively. Default: ``1`` .
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents the height and
             width of movement are both strides, or a tuple of two int numbers that represent height and width of
-            movement respectively. Default: 1.
+            movement respectively. Default: ``1`` .
         padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer, the
             paddings of top, bottom, left and right are the same, equal to pad. If `padding` is a tuple of `4` integers,
             the padding of top, bottom, left and right equal to `padding[0]`, `padding[1]`, `padding[2]` and
-            `padding[3]` correspondingly. Default: 0.
-        ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: False.
-        count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: True.
+            `padding[3]` correspondingly. Default: ``0`` .
+        ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
+        count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
         divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
-            `kernel_size` will be used. Default: 0.
+            `kernel_size` will be used. Default: ``0``, which means not specified.
     Returns:
         Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
@@ -419,6 +443,9 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mindspore.float32)
         >>> output = ops.avg_pool2d(x, kernel_size=2, stride=1)
         >>> print(output)
@@ -433,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         raise TypeError("For avg_pool2d, the input input_x must be tensor")
     if len(input_x.shape) != 4:
-        raise ValueError("For avg_pool2d, input must have 4 dim, but got {}.".format(len(input_x.shape)))
+        raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
     kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
     stride = _check_avgpool_2d_stride(stride)
@@ -491,18 +518,20 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
             float32 data type.
         kernel_size (Union[int, tuple[int]], optional): The size of kernel used to take the average value, is an int
             number that represents depth, height and width are both `kernel_size`, or a tuple of three int numbers that
-            represent depth, height and width respectively. Default: 1.
+            represent depth, height and width respectively. Default: ``1`` .
         stride (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents the
             depth, height and width of movement are both stride, or a tuple of three int numbers that represent depth,
-            height and width of movement respectively. Default: 1.
+            height and width of movement respectively. Default: ``1`` .
         padding (Union(int, tuple[int]), optional): The pad value to be filled. If `padding` is an integer, the addings
             of head, tail, top, bottom, left and right are the same, equal to pad. If `padding` is a tuple of six
             integers, the padding of head, tail, top, bottom, left and right equal to padding[0], padding[1],
-            padding[2], padding[3], padding[4] and padding[5] correspondingly. Default: 0
-        ceil_mode (bool, optional): If True, ceil instead of floor to compute the output shape. Default: False.
-        count_include_pad (bool, optional): If True, averaging calculation will include the zero-padding. Default: True.
+            padding[2], padding[3], padding[4] and padding[5] correspondingly. Default: ``0`` .
+        ceil_mode (bool, optional): If ``True`` , ceil instead of floor to
+            compute the output shape. Default: ``False`` .
+        count_include_pad (bool, optional): If ``True`` , averaging calculation
+            will include the zero-padding. Default: ``True`` .
         divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
-            otherwise `kernel_size` will be used. Default: 0.
+            otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
     Returns:
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
@@ -522,6 +551,9 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
         >>> output = ops.avg_pool3d(input_x, kernel_size=2, stride=1)
         >>> print(output)
@@ -532,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         raise TypeError("For avg_pool3d, the input input_x must be tensor")
     if len(input_x.shape) != 5:
-        raise ValueError("For avg_pool3d, input must have 5 dim, but got {}.".format(len(input_x.shape)))
+        raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
     _check_avg_pool3d_padding(padding)
@@ -547,6 +579,12 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
 @constexpr
+def is_ascend_backend():
+    """Check if the Ascend is used"""
+    return context.get_context('device_target') == 'Ascend'
+@_primexpr
 def _check_adaptive_max_pool1d_output_size(output_size):
     """Check the output_size value in adaptive_max_pool1d op."""
     validator.check_int(output_size, 1, validator.GE, "output_size", 'adaptive_max_pool1d')
@@ -563,7 +601,8 @@ def adaptive_max_pool1d(input, output_size):
     shape :math:`(N, C, L_{out})`, where :math:`L_{out}` is defined by `output_size`.
     Note:
-        :math:`L_{in}` must be divisible by `output_size`.
+        - :math:`L_{in}` must be divisible by `output_size`.
+        - Ascend platform only supports float16 type for input.
     Args:
         input (Tensor): Tensor of shape :math:`(N, C, L_{in})`, with float16 or float32 data type.
@@ -585,6 +624,9 @@ def adaptive_max_pool1d(input, output_size):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
         >>> output = ops.adaptive_max_pool1d(input, output_size=2)
         >>> print(output.shape)
@@ -599,16 +641,21 @@ def adaptive_max_pool1d(input, output_size):
     x_dtype = _get_cache_prim(P.DType)()(input)
     if len(x_in_shape) != 3:
-        raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {}.".format(len(x_in_shape)))
+        raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
     if x_in_shape[2] < output_size:
-        raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
-                         "output size {}, but got {}.".format(output_size, x_in_shape[2]))
+        raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
+                         f"output size {output_size}, but got {x_in_shape[2]}.")
     if x_in_shape[2] % output_size != 0:
-        raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
-                         "output size {}, but got {}.".format(output_size, x_in_shape[2]))
-    if x_dtype not in [mstype.float16, mstype.float32]:
-        raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
-                        "but got {}.".format(x_dtype))
+        raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
+                         f"output size {output_size}, but got {x_in_shape[2]}.")
+    if is_ascend_backend():
+        if x_dtype not in [mstype.float16]:
+            raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
+                            f"but got {x_dtype}.")
+    else:
+        if x_dtype not in [mstype.float16, mstype.float32]:
+            raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
+                            f"but got {x_dtype}.")
     expand_ = _get_cache_prim(P.ExpandDims)()
     squeeze_ = _get_cache_prim(P.Squeeze)(2)
@@ -616,11 +663,11 @@ def adaptive_max_pool1d(input, output_size):
     width = x_in_shape[2]
     stride = width // output_size
     kernel_size = width - (output_size - 1) * stride
-    stride = (1, stride)
+    stride = (1, width // output_size)
     kernel_size = (1, kernel_size)
-    max_pool_ = _get_cache_prim(P.MaxPool)(kernel_size=kernel_size, strides=stride)
+    max_pool_ = _get_cache_prim(NN_OPS.MaxPool)(kernel_size=kernel_size, strides=stride)
     input = expand_(input, 2)
     input = max_pool_(input)
     input = squeeze_(input)
@@ -659,7 +706,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
     Args:
         input (Tensor): A 3D or 4D tensor,
             with float16, float32 or float64 data type.
-        output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
+        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
             or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
             If it is None, it means the output size is the same as the input size.
@@ -681,6 +728,9 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> # case 1: output_size=(None, 2)
         >>> input = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
         ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
@@ -747,6 +797,8 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
         ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.arange(0,36).reshape((1, 3, 3, 4)).astype(np.float32))
         >>> output_size = (1, 1, 2)
         >>> output = ops.adaptive_max_pool3d(input, output_size, True)
@@ -795,9 +847,10 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
           Data type must be in int32 or int64.
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value.
         stride (Union[int, tuple[int]]): The distance of kernel moving,
-            If stride is 0, (0) or None, then stride equal to kernel_size. Default: None.
-        padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0.
-        output_size (tuple[int], optional): The output shape. Default: None.
+            If stride is 0, (0) or ``None`` , then stride equal to kernel_size.
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
+        padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` .
+        output_size (tuple[int], optional): The output shape. Default: ``None`` .
             If output_size == (), then the shape of output computed by `kernel_size`, `stride` and `padding`.
             If output_size != (), then output_size must be :math:`(N, C, H)` , :math:`(C, H)` or
             :math:`(H)` and output_size must belong to
@@ -821,6 +874,8 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[2, 4, 6, 8]]).astype(np.float32))
         >>> indices = Tensor(np.array([[1, 3, 5, 7]]).astype(np.int64))
         >>> output = ops.max_unpool1d(x, indices, kernel_size =2, stride=2, padding=0)
@@ -912,11 +967,11 @@ def max_unpool2d(x, indices, kernel_size, stride=None, padding=0, output_size=No
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the height and width of movement are both stride, or a tuple of two int numbers that
             represent height and width of movement respectively.
-            If stride is None, then stride equal to kernel_size. Default: None.
-        padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `padding` is an integer,
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
+        padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `padding` is an integer,
             the paddings of height and width are the same, equal to padding. If `padding` is a tuple of two
             integers, the padding of height and width equal to padding[0] and padding[1] correspondingly.
-        output_size (tuple[int], optional): The target output size. Default: None.
+        output_size (tuple[int], optional): The target output size. Default: ``None`` .
             If output_size == (), then the shape of output computed by `kernel_size`, `stride` and `padding`.
             If output_size != (), then output_size must be :math:`(N, C, H, W)` , :math:`(C, H, W)` or :math:`(H, W)`
             and output_size must belong to
@@ -942,6 +997,8 @@ def max_unpool2d(x, indices, kernel_size, stride=None, padding=0, output_size=No
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32))
         >>> indices = Tensor(np.array([[[[0, 1], [2, 3]]]]).astype(np.int64))
         >>> output = ops.max_unpool2d(x, indices, kernel_size=1, stride=1, padding=0)
@@ -1015,12 +1072,12 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the depth, height and width of movement are both stride, or a tuple of three int numbers that
             represent depth, height and width of movement respectively.
-            If stride is None, then stride equal to kernel_size. Default: None.
-        padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `padding` is an integer,
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
+        padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `padding` is an integer,
             the paddings of depth, height and width are the same, equal to padding. If `padding` is a tuple of three
             integers, the padding of depth, height and width equal to padding[0], padding[1] and padding[2]
             correspondingly.
-        output_size (tuple[int], optional): The output size. Default: None. If output_size == (), then the shape of
+        output_size (tuple[int], optional): The output size. Default: ``None`` . If output_size == (), then the shape of
             output computed by `kernel_size`, `stride` and `padding`. If output_size != (), then output_size must be
             :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` or :math:`(D, H, W)` and output_size must belong to
             :math:`[(N, C, D_{out} - stride[0], H_{out} - stride[1], W_{out} - stride[2]),
@@ -1045,6 +1102,8 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32))
         >>> indices= Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64))
         >>> output = ops.max_unpool3d(x, indices, kernel_size=2, stride=1, padding=0)
@@ -1092,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     return out
-def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
+def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
     r"""
     Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
     between the logits and the label.
@@ -1122,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
     This operator will multiply the output by the corresponding weight.
     The tensor :math:`weight` assigns different weights to each piece of data in the batch,
-    and the tensor :math:`pos_weight` adds corresponding weights to the positive examples of each category.
+    and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
     In addition, it can trade off recall and precision by adding weights to positive examples.
     In the case of multi-label classification the loss can be described as:
@@ -1141,15 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
         logits (Tensor): Input logits. Data type must be float16 or float32.
         label (Tensor): Ground truth label, has the same shape as `logits`.
           Data type must be float16 or float32.
-        weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
+        weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
           broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
-        pos_weight (Tensor): A weight of positive examples. Must be a vector with length equal to the
+          Default: ``None``, `weight` is a Tensor whose value is ``1``.
+        pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
           number of classes. It can be broadcast to a tensor with shape of `logits`.
-          Data type must be float16 or float32.
-        reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none',
-             not case sensitive. If 'none', do not perform reduction. Default: 'mean'.
+          Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar, if `reduction` is 'none', it's a tensor with the same shape and type as input `logits`.
+        Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
         Otherwise, the output is a scalar.
     Raises:
@@ -1157,12 +1222,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
         TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
         TypeError: If data type of input `reduction` is not string.
         ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
-        ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]), mindspore.float32)
         >>> label = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]), mindspore.float32)
         >>> weight = Tensor(np.array([1.0, 1.0, 1.0]), mindspore.float32)
@@ -1172,37 +1240,44 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
         0.3463612
     """
+    if weight is None:
+        weight = ops.ones_like(logits)
+    if pos_weight is None:
+        pos_weight = ops.ones_like(logits)
     bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
     return bce_with_logits_loss_op(logits, label, weight, pos_weight)
+@_function_forbid_reuse
 def dropout(input, p=0.5, training=True, seed=None):
-    """
+    r"""
     During training, randomly zeroes some of the elements of the input tensor
-    with probability `p` from a Bernoulli distribution. It plays the role of
-    reducing neuron correlation and avoid overfitting. The meaning of probability
-    here is opposite to that in `ops.Dropout` and `nn.Dropout`.
+    with probability `p` from a Bernoulli distribution. It plays the role of reducing neuron correlation and
+    avoid overfitting. And the return will be multiplied by :math:`\frac{1}{1-p}` during training.
+    During the reasoning, this operation returns the same Tensor as the `x`.
     Args:
-        input (Tensor): The input of Dropout, a Tensor of any shape with data type of float16 or float32.
+        input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
         p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
-            means dropping out 10% of input units. Default: 0.5.
-        training (bool): Apply dropout if is True. Default: True.
+            means dropping out 10% of input units. Default: ``0.5`` .
+        training (bool): Apply dropout if is True. Default: ``True``.
         seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
-            Default: None, which will be treated as 0.
+            Default: ``None`` , which will be treated as ``0`` .
     Returns:
         - **output** (Tensor) - Zeroed tensor, with the same shape and data type as `input`.
     Raises:
         TypeError: If `p` is not a float.
-        TypeError: If dtype of `input` is neither float16 nor float32.
+        TypeError: If dtype of `input` is not float16, float32 or float64.
         TypeError: If `input` is not a Tensor.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(((20, 16), (50, 50)), mindspore.float32)
         >>> output = ops.dropout(input, p=0.5)
         >>> print(output.shape)
@@ -1213,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
         return input
     keep_prob = 1 - p
     seed0, seed1 = _get_seed(seed, "dropout")
-    out, _ = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)(input)
+    dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
+    dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
+    out, _ = dropout_op(input)
     return out
@@ -1228,6 +1305,9 @@ def celu(x, alpha=1.0):
     For more details, please refer to `celu <https://arxiv.org/abs/1704.07483>`_.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         x (Tensor): The input of celu with data type of float16 or float32.
         alpha (float, optional): The :math:`\alpha` value for the Celu formulation. Default: 1.0
@@ -1245,6 +1325,9 @@ def celu(x, alpha=1.0):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([-2.0, -1.0, 1.0, 2.0]), mindspore.float32)
         >>> output = ops.celu(x, alpha=1.0)
         >>> print(output)
@@ -1278,8 +1361,8 @@ def dropout1d(input, p=0.5, training=True):
             number of channels, `L` is the feature length. The data type must be int8, int16, int32, int64, float16,
             float32 or float64.
         p (float, optional): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
-            which means an 80% chance of clearing. Default: 0.5.
-        training (bool, optional): Apply dropout if is True. Default: True.
+            which means an 80% chance of clearing. Default: ``0.5`` .
+        training (bool, optional): Apply dropout if is True. Default: ``True`` .
     Returns:
         Tensor, output, with the same shape and data type as `input`.
@@ -1294,6 +1377,9 @@ def dropout1d(input, p=0.5, training=True):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.random.randn(4, 3), mindspore.float32)
         >>> output = ops.dropout1d(input_x, 0.5)
         >>> print(output.shape)
@@ -1349,8 +1435,8 @@ def dropout2d(input, p=0.5, training=True):
             of channels, `H` is the feature height, and `W` is the feature width. The data type must be int8,
             int16, int32, int64, float16, float32 or float64.
         p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
-            which means dropping out 80% of channels. Default: 0.5.
-        training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: True.
+            which means dropping out 80% of channels. Default: ``0.5`` .
+        training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
     Returns:
         Tensor, output, with the same shape and data type as `input`.
@@ -1366,6 +1452,9 @@ def dropout2d(input, p=0.5, training=True):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
         >>> output = ops.dropout2d(input, 0.5)
         >>> print(output.shape)
@@ -1398,8 +1487,8 @@ def dropout3d(input, p=0.5, training=True):
             of channels, `D` is the feature depth, `H` is the feature height, and `W` is the feature width.
             The data type must be int8, int16, int32, int64, float16, float32 or float64.
         p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
-            which means dropping out 80% of channels. Default: 0.5.
-        training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: True.
+            which means dropping out 80% of channels. Default: ``0.5`` .
+        training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
     Returns:
         Tensor, output, with the same shape and data type as `input`.
@@ -1415,6 +1504,9 @@ def dropout3d(input, p=0.5, training=True):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.ones([2, 1, 2, 1, 2]), mindspore.float32)
         >>> output = ops.dropout3d(input, 0.5)
         >>> print(output.shape)
@@ -1452,6 +1544,9 @@ def fast_gelu(x):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> output = ops.fast_gelu(x)
         >>> print(output)
@@ -1461,20 +1556,28 @@ def fast_gelu(x):
     return fast_gelu_(x)
-@constexpr
-def _check_float_range_inc_right(arg_value, lower_limit, upper_limit, arg_name=None, prim_name=None):
+@_primexpr
+def _check_float_range_inc_neither(arg_value, lower_limit, upper_limit, arg_name=None, prim_name=None):
     """
-    Method for checking whether input value is in float range inc right.
+    Method for checking whether input value is in float range inc neither.
     """
-    return validator.check_float_range(arg_value, lower_limit, upper_limit, validator.INC_RIGHT, arg_name, prim_name)
+    return validator.check_float_range(arg_value, lower_limit, upper_limit, validator.INC_NEITHER, arg_name, prim_name)
+def _check_fractional_output_size_ratio(output_size, output_ratio, cls_name):
+    """Internal function, used to check whether fractional_max_pool can specify the output shape."""
+    if output_ratio is None and output_size is None:
+        raise ValueError(f"For {cls_name}, 'output_size' and 'output_ratio' can not be None"
+                         f"at the same time, but got {output_ratio} and {output_size} .")
 def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False,
                           _random_samples=None):
     r"""
     Applies the 2D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
-    `output_size` or `output_ratio`, and the step size is determined by `_random_samples`.
-    `output_size` or `output_ratio` cannot be used at the same time.
+    `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
+    effect when `output_size` and `output_ratio` are set at the same time.
+    And `output_size` and `output_ratio` can not be ``None`` at the same time.
     Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_  for more details.
@@ -1489,16 +1592,17 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
             is an int number that represents height and width, or a tuple
             of two int numbers that represent height and width respectively.
             The value must be a positive integer.
-            Default: None.
+            Default: ``None``.
         output_ratio (Union[float, tuple[float]], optional): The ratio of target output shape to input shape.
             Specifying the size of the output tensor by using a ratio of the input size.
             Data type: float16, float32, double, and value is between (0, 1).
-            Default: None.
-        return_indices (bool, optional): Whether to return the indices of max value. Default: False.
-        _random_samples (Tensor, optional): The random step of FractionalMaxPool2d, which is a 3D tensor.
-            Tensor of data type: float16, float32, double, and value is between (0, 1).
+            Default: ``None``.
+        return_indices (bool, optional): Whether to return the indices of max value. Default: ``False``.
+        _random_samples (Tensor, optional): The random step of fractional_max_pool2d, which is a 3D tensor.
+            Tensor of data type: float16, float32, double, and value is between [0, 1).
             Supported shape :math:`(N, C, 2)` or :math:`(1, C, 2)`.
-            Default: None.
+            Default: ``None``, the values of `_random_samples`
+            will be randomly distributed using uniform distribution over an interval [0,1).
     Returns:
         - **y** (Tensor) - Has the same type as the `input`.
@@ -1526,6 +1630,9 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
         ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> input = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
         ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
         ...                            0.9581, 0.4760, 0.9014, 0.8522, 0.3664,
@@ -1549,21 +1656,28 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
         [[[[ 1  9]
            [16 24]]]]
     """
-    if output_ratio is not None and output_size is not None or output_ratio is None and output_size is None:
-        raise ValueError(f"For fractional_max_pool2d, 'output_size' and 'output_ratio' can not be specified or None"
-                         f"at the same time, but got {output_ratio} and {output_size} .")
-    if len(input.shape) == 3:
+    _check_fractional_output_size_ratio(output_size, output_ratio, "fractional_max_pool2d")
+    _check_value_type("return_indices", return_indices, [bool], "fractional_max_pool2d")
+    dim_flag = False
+    if input.ndim == 3:
         input = input.expand_dims(axis=0)
+        dim_flag = True
     if _random_samples is None:
-        _random_samples = Tensor([[[0, 0]]], mstype.float32)
-    if output_ratio is not None:
-        if isinstance(output_ratio, float):
+        if input.dtype in mstype.float_type:
+            _random_samples = ops.rand(input.shape[0], input.shape[1], 2, dtype=input.dtype)
+        else:
+            _random_samples = ops.rand(input.shape[0], input.shape[1], 2)
+    if output_size is None:
+        if isinstance(output_ratio, (float, int)):
+            _check_value_type("output_ratio", output_ratio, [float], "fractional_max_pool2d")
             output_ratio = (output_ratio, output_ratio)
-        _check_float_range_inc_right(output_ratio[0], 0.0, 1.0)
-        _check_float_range_inc_right(output_ratio[1], 0.0, 1.0)
+        _check_float_range_inc_neither(output_ratio[0], 0.0, 1.0, "output_ratio[0]", "fractional_max_pool2d")
+        _check_float_range_inc_neither(output_ratio[1], 0.0, 1.0, "output_ratio[1]", "fractional_max_pool2d")
         output_size = (int(input.shape[-2] * output_ratio[0]), int(input.shape[-1] * output_ratio[1]))
     fractional_max_pool = FractionalMaxPoolWithFixedKsize(kernel_size, output_size)
     output = fractional_max_pool(input, _random_samples)
+    if dim_flag:
+        output = output[0].squeeze(axis=0), output[1].squeeze(axis=0)
     if return_indices:
         return output
     return output[0]
@@ -1573,17 +1687,21 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
                           _random_samples=None):
     r"""
     Applies the 3D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
-    `output_size` or `output_ratio`, and the step size is determined by `_random_samples`.
-    `output_size` or `output_ratio` cannot be used at the same time.
+    `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
+    effect when `output_size` and `output_ratio` are set at the same time.
+    And `output_size` and `output_ratio` can not be ``None`` at the same time.
     Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_  for more details.
     The input and output data format can be "NCDHW". N is the batch size, C is the number of channels,
     D the feature depth, H is the feature height, and W is the feature width.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): The input of FractionalMaxPool3d, which is a 4D or 5D tensor.
-            Tensor of data type: float16, float32, double, int32, int64.
+            Tensor of data type: float16, float32, double.
             Supported shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents depth, height and width of the kernel, or a tuple
@@ -1593,15 +1711,16 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
             is an int number that represents depth, height and width, or a tuple
             of three int numbers that represent depth, height and width respectively.
             The value must be a positive integer.
-            Default: None.
+            Default: ``None`` .
         output_ratio (Union[float, tuple[float]], optional): The ratio of target output shape to input shape.
             Specifying the size of the output tensor by using a ratio of the input size.
             Data type: float16, float32, double, and value is between (0, 1).
-            Default: None.
-        return_indices (bool, optional): Whether to return the indices of max value. Default: False.
-        _random_samples (Tensor, optional): The random step of FractionalMaxPool3d, which is a 3D tensor.
-            Tensor of data type: float16, float32, double, and value is between (0, 1).
-            Supported shape :math:`(N, C, 3)` or :math:`(1, C, 3)` .
+            Default: ``None`` .
+        return_indices (bool, optional): Whether to return the indices of max value. Default: ``False`` .
+        _random_samples (Tensor, optional): The random step of fractional_max_pool3d, which is a 3D tensor.
+            Tensor of data type: float16, float32, double, and value is between [0, 1).
+            Supported shape :math:`(N, C, 3)` or :math:`(1, C, 3)` . Default: ``None``, the values of `_random_samples`
+            will be randomly distributed using uniform distribution over an interval [0,1).
     Returns:
         - **y** (Tensor) - A tensor, the output of FractionalMaxPool3d.
@@ -1619,6 +1738,7 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
         TypeError: If data type of `input` is not float16, float32, double, int32, int64.
         TypeError: If dtype of `_random_samples` is not float16, float32, double.
         TypeError: If dtype of `argmax` is not int32, int64.
+        TypeError: if _random_samples to have the different dtypes as input.
         ValueError: If `output_size` is a tuple and if `output_size` length is not 3.
         ValueError: If `kernel_size` is a tuple and if `kernel_size` length is not 3.
         ValueError: If numbers in `output_size` or `kernel_size` is not positive.
@@ -1631,35 +1751,47 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
         ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
         ...            .reshape([1, 1, 2, 2, 4]), mstype.float32)
         >>> _random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
-        >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1.0, 1.0, 1.0), output_size=(1, 1, 3),
+        >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1, 1, 1), output_size=(1, 1, 3),
         ...                                            _random_samples=_random_samples, return_indices=True)
         >>> print(output)
         [[[[[13. 14. 16.]]]]]
         >>> print(argmax)
         [[[[[12 13 15]]]]]
-        >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1.0, 1.0, 1.0), output_ratio=(0.5, 0.5, 0.5),
+        >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1, 1, 1), output_ratio=(0.5, 0.5, 0.5),
         ...                                            _random_samples=_random_samples, return_indices=True)
         >>> print(output)
         [[[[[13. 16.]]]]]
         >>> print(argmax)
         [[[[[12 15]]]]]
     """
-    if output_ratio is not None and output_size is not None or output_ratio is None and output_size is None:
-        raise ValueError(f"For fractional_max_pool2d, 'output_size' and 'output_ratio' can not be specified or None"
-                         f"at the same time, but got {output_ratio} and {output_size} .")
+    _check_fractional_output_size_ratio(output_size, output_ratio, "fractional_max_pool3d")
+    _check_value_type("return_indices", return_indices, [bool], "fractional_max_pool3d")
     if _random_samples is None:
-        _random_samples = Tensor([[[0, 0, 0]]], mstype.float32)
-    if output_ratio is not None:
-        if isinstance(output_ratio, float):
+        n = 1 if input.ndim == 4 else input.shape[0]
+        if input.dtype in mstype.float_type:
+            _random_samples = ops.rand(n, input.shape[-4], 3, dtype=input.dtype)
+        else:
+            _random_samples = ops.rand(n, input.shape[-4], 3)
+    if input.ndim == 4:
+        _random_samples = _random_samples.transpose(1, 0, 2)
+    if output_size is None:
+        if isinstance(output_ratio, (float, int)):
+            _check_value_type("output_ratio", output_ratio, [float], "fractional_max_pool3d")
             output_ratio = (output_ratio, output_ratio, output_ratio)
-        _check_float_range_inc_right(output_ratio[0], 0.0, 1.0)
-        _check_float_range_inc_right(output_ratio[1], 0.0, 1.0)
-        _check_float_range_inc_right(output_ratio[2], 0.0, 1.0)
+        _check_float_range_inc_neither(output_ratio[0], 0.0, 1.0, "output_ratio[0]", "fractional_max_pool3d")
+        _check_float_range_inc_neither(output_ratio[1], 0.0, 1.0, "output_ratio[1]", "fractional_max_pool3d")
+        _check_float_range_inc_neither(output_ratio[2], 0.0, 1.0, "output_ratio[2]", "fractional_max_pool3d")
         output_size = (int(input.shape[-3] * output_ratio[0]), int(input.shape[-2] * output_ratio[1]),
                        int(input.shape[-1] * output_ratio[2]))
+    if input.dtype != _random_samples.dtype:
+        raise TypeError(f"For 'fractional_max_pool3d', 'input' and '_random_samples' must be same dtype, "
+                        f"but got Tensor[{input.dtype}] and Tensor[{_random_samples.dtype}].")
     fractional_max_pool = FractionalMaxPool3DWithFixedKsize(kernel_size, output_size)
     output = fractional_max_pool(input, _random_samples)
     if return_indices:
@@ -1700,10 +1832,10 @@ def kl_div(logits, labels, reduction='mean'):
         logits (Tensor): The input Tensor. The data type must be float16, float32 or float64.
         labels (Tensor): The label Tensor which has the same shape and data type as `logits`.
         reduction (str): Specifies the reduction to be applied to the output.
-            Its value must be one of 'none', 'mean', 'batchmean' or 'sum'. Default: 'mean'.
+            Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
     Returns:
-        Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
+        Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
         Otherwise, it is a scalar.
     Raises:
@@ -1715,6 +1847,9 @@ def kl_div(logits, labels, reduction='mean'):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
         >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32)
         >>> output = mindspore.ops.kl_div(logits, labels, 'mean')
@@ -1727,13 +1862,13 @@ def kl_div(logits, labels, reduction='mean'):
     if reduction == 'batchmean':
         kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
-        shape = P.TensorShape()(logits)
+        shape = P.Shape()(logits)
         batch_size = shape[0]
         return kl_div_sum / batch_size
     if reduction == 'mean':
         kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
-        shape = P.TensorShape()(logits)
+        shape = P.Shape()(logits)
         total_size = 1
         for dim in shape:
             total_size = total_size * dim
@@ -1758,7 +1893,7 @@ def hardshrink(x, lambd=0.5):
     Args:
         x (Tensor): The input of Hard Shrink with data type of float16 or float32.
-        lambd (float): The threshold :math:`\lambda` defined by the Hard Shrink formula. Default: 0.5.
+        lambd (float): The threshold :math:`\lambda` defined by the Hard Shrink formula. Default: ``0.5`` .
     Returns:
         Tensor, has the same data type and shape as the input `x`.
@@ -1772,6 +1907,9 @@ def hardshrink(x, lambd=0.5):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[ 0.5,  1,  2.0], [0.0533,0.0776,-2.1233]]), mindspore.float32)
         >>> output = ops.hardshrink(x)
         >>> print(output)
@@ -1949,7 +2087,7 @@ def is_floating_point(input):
         input (Tensor): The input Tensor.
     Returns:
-        Bool. If the dtype of `input` is a floating point data type, return True. Otherwise, return False.
+        Bool. If the dtype of `input` is a floating point data type, return ``True`` . Otherwise, return ``False`` .
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1990,12 +2128,15 @@ def hardswish(x):
     Raises:
         TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is not supported.
+        TypeError: If dtype of `x` is not int or float.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
         >>> output = ops.hardswish(x)
         >>> print(output)
@@ -2004,12 +2145,79 @@ def hardswish(x):
     return hardswish_(x)
+def _is_dim_unknown(shape):
+    return isinstance(shape, tuple) and -2 in shape
+@_primexpr
+def _interploate_make_tuple(rank, value):
+    s = tuple_to_tensor_((rank,), mstype.int32)
+    v = Tensor(value)
+    t = _get_cache_prim(P.FillV2)()(s, v)
+    out = tensor_to_tuple_(t)
+    return out
 @_primexpr
-def _scale_factor_convert_size(shape, scale_factor, dim):
-    return [int(float(shape[i + 2]) * scale_factor[i] // 1) for i in range(dim)]
+def _interpolate_scale_factor_convert_size(shape, scale_factor):
+    x = tuple_to_tensor_(shape[2:], mstype.int64)
+    y = tuple_to_tensor_(scale_factor, mstype.float32)
+    t = x * y
+    t = ops.TruncateDiv()(t, Tensor(1))
+    t = ops.cast(t, mstype.int64)
+    return tensor_to_tuple_(t)
+def _interpolate_size_check_with_rank(size, input_rank):
+    if len(size) != input_rank - 2:
+        raise ValueError(
+            f"For 'interpolate', 'input' and 'size' must have the same spatial dimensions, "
+            f"but got 'input' is {input_rank - 2}D, 'size' is {len(size)}D")
+def _interpolate_scale_factor_check_with_rank(scale_factor, input_rank):
+    if len(scale_factor) != input_rank - 2:
+        raise ValueError(
+            f"For 'interpolate', 'input' and 'scale_factor' must have the same spatial dimensions, "
+            f"but got 'input' is {input_rank - 2}D, 'scale_factor' is {len(scale_factor)}D"
+        )
-def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None):
+def _interpolate_mode_check(mode, supported_dict):
+    if isinstance(mode, list) or mode not in supported_dict:
+        raise ValueError(
+            f"For 'interpolate', 'mode' must be in '{list(supported_dict)}', but got {mode}"
+        )
+def _interpolate_rank_check(input_rank, mode, supported_dict):
+    if input_rank not in supported_dict.get(mode):
+        raise ValueError(
+            f"For 'interpolate', {mode} only support '{list(supported_dict.get(mode, {}))}'D, but got {input_rank}D"
+        )
+def _interpolate_scale_factor_check(scale_factor, mode, rank, supported_dict):
+    if scale_factor is not None and "scale_factor" not in supported_dict.get(
+            mode, {}).get(rank):
+        raise ValueError(
+            f"For 'interpolate', 'scale_factor' option cannot currently be set with the "
+            f"mode = {mode} and dim = {rank}D.")
+def _interpolate_align_corners_mode_check(rank, mode, supported_dict):
+    if "align_corners" not in supported_dict.get(mode, {}).get(rank):
+        raise ValueError(
+            f"For 'interpolate', 'align_corners' option cannot currently be set with the "
+            f"mode = {mode}, and dim = {rank}D")
+def interpolate(input,
+                size=None,
+                scale_factor=None,
+                mode="nearest",
+                align_corners=None,
+                recompute_scale_factor=None):
     r"""
     Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
@@ -2018,16 +2226,21 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
             Input tensor must be a 3-D, 4-D, or 5-D tensor with shape
             :math:`(N, C, [optional D], [optional H], W)` , with data type of float.
         size (Union[int, tuple[int], list[int]], optional): The target size.
-            If size is a tuple or list, size must have the same dimensions as input.
-            One and only one of size and scale_factor can be set to None. Default: None.
+            If size is a tuple or list, its length should be the same as the number of dimensions in input
+            after removing the first two dimensions N, C.
+            One and only one of size and scale_factor can be set to None. Default: ``None`` .
         scale_factor (Union[float, tuple[float], list[float]], optional): The scale factor of new size of the tensor.
-            If size is a tuple or list, size must have the same dimensions as input.
-            One and only one of size and scale_factor can be set to None. Default: None.
+            If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
+            after removing the first two dimensions N, C.
+            One and only one of size and scale_factor can be set to None. Default: ``None`` .
         mode (str): The sampling algorithm.
-            One of 'nearest'(3D and 4D), 'linear' (3D only), 'bilinear' (4D only), 'bicubic' (4D only),
-            'area', 'nearest-exact'(3D and 4D). Default: 'nearest'.
+            One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
+            'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
+            knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
         align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
             aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
+            Default: ``None`` .
             .. code-block::
@@ -2035,11 +2248,11 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
                 old_i = new_length > 1 ? (new_x + 0.5) * old_length / new_length - 0.5 : 0  # 'align_corners' = False
-            This is only valid for 'linear', 'bilinear', or 'bicubic' modes. Default: False.
+            This is only valid for 'linear', 'bilinear', or 'bicubic' modes. Default: ``False`` .
         recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
             If True, the parameter `size` will be calculated using the value of the `scale_factor`,
             and finally scaled using the value of `size`.
-            If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: None.
+            If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
     .. note::
         The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
@@ -2055,22 +2268,26 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
     +---------------+-----------+---------------+--------------+----------------+
     |               | 4         | \-            | ×            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
-    | linear        | 3         | √             | ×            | GPU,CPU        |
+    |               | 5         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | linear        | 3         | √             | ×            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
     | bilinear      | 4         | √             | ×            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
-    | bicubic       | 4         | √             | ×            | GPU,CPU        |
+    | bicubic       | 4         | √             | ×            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
     | area          | 3         | \-            | √            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
-    |               | 4         | \-            | √            | GPU            |
+    |               | 4         | \-            | √            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
-    |               | 5         | \-            | √            | GPU,CPU        |
+    |               | 5         | \-            | √            | Ascend,GPU,CPU |
     +---------------+-----------+---------------+--------------+----------------+
     | nearest-exact | 3         | \-            | ×            | Ascend,CPU     |
     +---------------+-----------+---------------+--------------+----------------+
     |               | 4         | \-            | ×            | Ascend,CPU     |
     +---------------+-----------+---------------+--------------+----------------+
+    | trilinear     | 5         | √             | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
     - `-` indicates that there is no such parameter.
     - `×` indicates that this parameter is not currently supported.
@@ -2106,73 +2323,68 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
     def run_nearest(x, size, align_corners=None, scale_factor=None):
         # 3D 4D use ResizeNearestNeighborV2, 5D use UpsampleNearest3D
-        if x.ndim == 3:
-            size = seq.TupleToTensor()((size[0], 1), mstype.int32)
+        x_rank = F.rank(x)
+        if size is not None and x_rank == 3:
+            t1 = seq.TupleToTensor()(size[:1], mstype.int32)
+            t2 = Tensor([1], mstype.int32)
+            size = F.concat([t1, t2])
             x = x.unsqueeze(-1)
-            x = _get_cache_prim(P.ResizeNearestNeighborV2)(data_format="NCHW")(x, size)
+            x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
+                x, size)
             x = P.Squeeze(-1)(x)
-        elif x.ndim == 4:
-            if isinstance(size, int):
-                size = F.scalar_to_tensor(size, mstype.int32)
-            elif isinstance(size, tuple):
-                size = seq.TupleToTensor()(size, mstype.int32)
-            else:
-                size = seq.ListToTensor()(size, mstype.int32)
-            x = _get_cache_prim(P.ResizeNearestNeighborV2)(data_format="NCHW")(x, size)
+        elif size is not None and x_rank == 4:
+            size = seq.TupleToTensor()(size[:2], mstype.int32)
+            x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
+                x, size)
         else:
-            x = _get_cache_prim(P.UpsampleNearest3D)(size, scales=scale_factor)(x)
+            x = _get_cache_prim(P.UpsampleNearest3D)()(x, size, scale_factor)
         return x
     def run_linear(x, size, align_corners=None, scale_factor=None):
         coordinate_transformation_mode = "align_corners" if align_corners else "half_pixel"
-        resize = _get_cache_prim(P.image_ops.ResizeLinear1D)(
-            coordinate_transformation_mode
-        )
+        resize = _get_cache_prim(
+            P.image_ops.ResizeLinear1D)(coordinate_transformation_mode)
         return resize(x, size)
     def run_bilinear(x, size, align_corners=None, scale_factor=None):
-        resize = _get_cache_prim(P.ResizeBilinearV2)(align_corners, not align_corners)
+        resize = _get_cache_prim(P.ResizeBilinearV2)(align_corners,
+                                                     not align_corners)
         return resize(x, size)
     def run_trilinear(x, size, align_corners=None, scale_factor=None):
-        resize = _get_cache_prim(P.nn_ops.UpsampleTrilinear3D)(
-            output_size=size, scales=scale_factor, align_corners=align_corners
-        )
-        return resize(x)
+        resize = _get_cache_prim(
+            P.nn_ops.UpsampleTrilinear3D)(align_corners=align_corners)
+        return resize(x, size, scale_factor)
     def run_bicubic(x, size, align_corners=None, scale_factor=None):
         resize = _get_cache_prim(P.image_ops.ResizeBicubic)(
-            align_corners=align_corners, half_pixel_centers=not align_corners
-        )
-        if isinstance(size, int):
-            size = F.scalar_to_tensor(size, mstype.int32)
-        elif isinstance(size, tuple):
-            size = seq.TupleToTensor()(size, mstype.int32)
-        else:
-            size = seq.ListToTensor()(size, mstype.int32)
+            align_corners=align_corners, half_pixel_centers=not align_corners)
+        size = seq.TupleToTensor()(size, mstype.int32)
         x = resize(x, size)
         return x
     def run_area(x, size, align_corners=None, scale_factor=None):
-        if x.ndim == 3:
+        x_rank = F.rank(x)
+        if x_rank == 3:
             x = ops.adaptive_avg_pool1d(x, size[0])
-        elif x.ndim == 4:
+        elif x_rank == 4:
             x = ops.adaptive_avg_pool2d(x, tuple(size))
         else:
             x = ops.adaptive_avg_pool3d(x, tuple(size))
         return x
     def run_nearest_exact(x, size, align_corners=None, scale_factor=None):
-        if x.ndim == 3:
+        x_rank = F.rank(x)
+        if x_rank == 3:
             size = seq.TupleToTensor()((size[0], 1), mstype.int32)
             # For impl of nearest 3D use 4D.
             x = x.unsqueeze(-1)
             resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
-                data_format="NCHW", align_corners=False, half_pixel_centers=True
-            )
+                align_corners=False,
+                half_pixel_centers=True)
             x = resize(x, size)
             x = P.Squeeze(-1)(x)
-        if x.ndim == 4:
+        if x_rank == 4:
             if isinstance(size, int):
                 size = F.scalar_to_tensor(size, mstype.int32)
             elif isinstance(size, tuple):
@@ -2180,18 +2392,41 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
             else:
                 size = seq.ListToTensor()(size, mstype.int32)
             resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
-                data_format="NCHW", align_corners=False, half_pixel_centers=True
-            )
+                align_corners=False,
+                half_pixel_centers=True)
             x = resize(x, size)
         return x
     supported_dict = {
-        "nearest": {3: (), 4: ()},
-        "linear": {3: ("align_corners",)},
-        "bilinear": {4: ("align_corners",)},
-        "bicubic": {4: ("align_corners",)},
-        "area": {3: ("scale_factor",), 4: ("scale_factor",), 5: ("scale_factor",)},
-        "nearest-exact": {3: (), 4: ()},
+        "nearest": {
+            3: (),
+            4: (),
+            5: ("scale_factor",)
+        },
+        "linear": {
+            3: ("align_corners",)
+        },
+        "bilinear": {
+            4: ("align_corners",)
+        },
+        "bicubic": {
+            4: ("align_corners",)
+        },
+        "area": {
+            3: ("scale_factor",),
+            4: ("scale_factor",),
+            5: ("scale_factor",)
+        },
+        "nearest-exact": {
+            3: (),
+            4: ()
+        },
+        "trilinear": {
+            5: (
+                "align_corners",
+                "scale_factor",
+            )
+        },
     }
     resize_func = {
         "nearest": run_nearest,
@@ -2202,76 +2437,87 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
         "area": run_area,
         "nearest-exact": run_nearest_exact,
     }
     if not isinstance(input, Tensor):
-        raise TypeError(f"For 'interpolate', 'input' must be a tensor, but got {type(input)}")
+        raise TypeError(
+            f"For 'interpolate', 'input' must be a tensor, but got {type(input)}"
+        )
+    if isinstance(size, list):
+        size = tuple(size)
+    if isinstance(scale_factor, list):
+        scale_factor = tuple(scale_factor)
+    rank = F.rank(input)
+    shape = F.shape(input)
+    dim_unknown = _is_dim_unknown(shape)
+    # check for size and scale_factor
     if size is not None and scale_factor is not None:
         raise ValueError(
             "For 'interpolate', 'size' and 'scale_factor' cannot be set simultaneously"
         )
     if size is not None:
         if isinstance(size, (list, tuple)):
-            if len(size) != input.ndim - 2:
-                raise ValueError(
-                    f"For 'interpolate', 'input' and 'size' must have the same spatial dimensions, "
-                    f"but got 'input' is {input.ndim - 2}D, 'size' is {len(size)}D"
-                )
             check_positive_int_sequence_const(size, "size", "interpolate")
+            if dim_unknown is False:
+                _interpolate_size_check_with_rank(size, rank)
         else:
             check_positive_int_const(size, "size", "interpolate")
-            size = [size for _ in range(input.ndim - 2)]
+            if dim_unknown is False:
+                size = tuple([size for _ in range(rank - 2)])
+            else:
+                size = _interploate_make_tuple(rank - 2, size)
     elif scale_factor is not None:
         if isinstance(scale_factor, (list, tuple)):
-            if len(scale_factor) != input.ndim - 2:
-                raise ValueError(
-                    f"For 'interpolate', 'input' and 'scale_factor' must have the same spatial dimensions, "
-                    f"but got 'input' is {input.ndim - 2}D, 'scale_factor' is {len(scale_factor)}D"
-                )
-            check_positive_float_sequence_const(scale_factor, "scale_factor", "interpolate")
+            check_positive_float_sequence_const(scale_factor, "scale_factor",
+                                                "interpolate")
+            if dim_unknown is False:
+                _interpolate_scale_factor_check_with_rank(scale_factor, rank)
         else:
-            check_positive_float_const(scale_factor, "scale_factor", "interpolate")
-            scale_factor = [scale_factor for _ in range(input.ndim - 2)]
+            check_positive_float_const(scale_factor, "scale_factor",
+                                       "interpolate")
+            if dim_unknown is False:
+                scale_factor = tuple([scale_factor for _ in range(rank - 2)])
+            else:
+                scale_factor = _interploate_make_tuple(rank - 2, scale_factor)
     else:
         raise ValueError(
             "For 'interpolate', 'size' and 'scale_factor' cannot be both empty"
         )
-    if isinstance(mode, list) or mode not in supported_dict:
-        raise ValueError(
-            f"For 'interpolate', 'mode' must be in '{list(supported_dict)}', but got {mode}"
-        )
-    if input.ndim not in supported_dict.get(mode):
-        raise ValueError(
-            f"For 'interpolate', {mode} only support '{list(supported_dict.get(mode, {}))}'D, but got {input.ndim}D"
-        )
+    # rank check
+    _interpolate_mode_check(mode, supported_dict)
+    if dim_unknown is False:
+        _interpolate_rank_check(rank, mode, supported_dict)
     # "area" mode always requires an explicit size rather than scale factor.
     if mode == "area" and size is None:
         recompute_scale_factor = True
+    # recompute_scale_factor
     if recompute_scale_factor is not None and recompute_scale_factor:
-        check_bool_const(recompute_scale_factor, "recompute_scale_factor", "interpolate")
+        check_bool_const(recompute_scale_factor, "recompute_scale_factor",
+                         "interpolate")
         if size is not None:
             raise ValueError(
                 "For 'interpolate', it is incorrect to set 'recompute_scale_factor' to True"
-                " after specifying an explicit 'size'."
-            )
-        size = _scale_factor_convert_size(input.shape, scale_factor, input.ndim - 2)
+                " after specifying an explicit 'size'.")
+        size = _interpolate_scale_factor_convert_size(shape, scale_factor)
         scale_factor = None
     else:
-        if scale_factor is not None and "scale_factor" not in supported_dict.get(mode, {}).get(input.ndim):
-            raise ValueError(
-                f"For 'interpolate', 'scale_factor' option cannot currently be set with the "
-                f"mode = {mode} and dim = {input.ndim}D."
-            )
+        if dim_unknown is False:
+            _interpolate_scale_factor_check(scale_factor, mode, rank,
+                                            supported_dict)
+    # align_corners
     if align_corners is not None:
         check_bool_const(align_corners, "align_corners", "interpolate")
-        if "align_corners" not in supported_dict.get(mode, {}).get(input.ndim):
-            raise ValueError(
-                f"For 'interpolate', 'align_corners' option cannot currently be set with the "
-                f"mode = {mode}, and dim = {input.ndim}D"
-            )
+        if dim_unknown is False:
+            _interpolate_align_corners_mode_check(rank, mode, supported_dict)
     else:
         align_corners = False
-    if isinstance(size, list):
-        size = tuple(size)
     return resize_func.get(mode)(input, size, align_corners, scale_factor)
@@ -2309,6 +2555,9 @@ def softsign(x):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
         >>> output = ops.softsign(x)
         >>> print(output)
@@ -2317,6 +2566,60 @@ def softsign(x):
     return softsign_(x)
+def soft_margin_loss(input, target, reduction='mean'):
+    r"""
+    Calculate the soft margin loss of input and target.
+    Creates a criterion that optimizes a two-class classification
+    logistic loss between input tensor :math:`x` and target tensor :math:`y`
+    (containing 1 or -1).
+    .. math::
+        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
+    where :math:`x.nelement()` is the number of elements of :math:`x`.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): Predict data. Data type must be float16 or float32.
+        target (Tensor): Ground truth data, with the same type and shape as `logits`.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
+    Outputs:
+        Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
+        Otherwise, a scalar value will be returned.
+    Raises:
+        TypeError: If `input` or `target` is not a Tensor.
+        TypeError: If dtype of `input` or `target` is neither float16 nor float32.
+        ValueError: If shape of `input` is not the same as that of `target`.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
+        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
+        >>> output = ops.soft_margin_loss(logits, labels)
+        >>> print(output)
+        0.6764238
+    """
+    soft_margin_loss_op = _get_cache_prim(P.SoftMarginLoss)(reduction=reduction)
+    output = soft_margin_loss_op(input, target)
+    return output
 def softmax(x, axis=-1, *, dtype=None):
     r"""
     Applies the Softmax operation to the input tensor on the specified axis.
@@ -2324,18 +2627,18 @@ def softmax(x, axis=-1, *, dtype=None):
     the Softmax function is shown as follows:
     .. math::
-        \text{output}(x_i) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
+        \text{output}(x_i) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
     where :math:`N` is the length of the tensor.
     Args:
-        axis (Union[int, tuple[int]], optional): The axis to perform the Softmax operation. Default: -1.
+        axis (Union[int, tuple[int]], optional): The axis to perform the Softmax operation. Default: ``-1`` .
         x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, with float16 or float32 data type.
     Keyword Args:
         dtype (:class:`mindspore.dtype`, optional): When set, `x` will be converted to the specified type,
-            `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: None.
+            `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
     Returns:
         Tensor, with the same type and shape as the logits.
@@ -2350,6 +2653,9 @@ def softmax(x, axis=-1, *, dtype=None):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
         >>> output = ops.softmax(x)
         >>> print(output)
@@ -2372,18 +2678,18 @@ def softmin(x, axis=-1, *, dtype=None):
     the Softmin function is shown as follows:
     .. math::
-        \text{output}(x_i) = \frac{exp(-x_i)}{\sum_{j = 0}^{N-1}\exp(-x_j)},
+        \text{output}(x_i) = \frac{\exp(-x_i)}{\sum_{j = 0}^{N-1}\exp(-x_j)},
     where :math:`N` is the length of the tensor.
     Args:
-        axis (Union[int, tuple[int]], optional): The axis to perform the Softmin operation. Default: -1.
+        axis (Union[int, tuple[int]], optional): The axis to perform the Softmin operation. Default: ``-1`` .
         x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, with float16 or float32 data type.
     Keyword Args:
         dtype (:class:`mindspore.dtype`, optional): When set, `x` will be converted to the specified type,
-            `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: None.
+            `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
     Returns:
         Tensor, with the same type and shape as the logits.
@@ -2398,6 +2704,9 @@ def softmin(x, axis=-1, *, dtype=None):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
         >>> output = ops.softmin(x)
         >>> print(output)
@@ -2407,7 +2716,7 @@ def softmin(x, axis=-1, *, dtype=None):
     if dtype is not None:
         x = ops.cast(x, dtype)
     softmax_ = _get_cache_prim(P.Softmax)(axis=axis)
-    return softmax_(-x)
+    return softmax_(-1*x)
 def softshrink(x, lambd=0.5):
@@ -2424,7 +2733,7 @@ def softshrink(x, lambd=0.5):
     Args:
         x (Tensor): The input of soft shrink with data type of float16 or float32.
-        lambd (float): The :math:`\lambda` must be no less than zero. Default: 0.5.
+        lambd (float): The :math:`\lambda` must be no less than zero. Default: ``0.5`` .
     Returns:
         Tensor, has the same shape and data type as `x`.
@@ -2461,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
     return soft_shrink_op(input)
+def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
+    r"""
+    Applies softplus function to `input` element-wise.
+    The softplus function is shown as follows, x is the element of `input` :
+    .. math::
+        \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
+    When :math:`input * beta > threshold`, the implementation converts to the linear function
+    to ensure numerical stability.
+    Args:
+        input (Tensor) - Tensor of any dimension.
+            Supported dtypes:
+            - GPU/CPU: float16, float32, float64.
+            - Ascend: float16, float32.
+        beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
+        threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
+            Default: ``20`` .
+    Returns:
+        Tensor, with the same type and shape as the `input` .
+    Raises:
+        TypeError: If `input` is not a Tensor.
+        TypeError: If the dtype of `input` is not float16, float32 or float64.
+    Supported Platforms:
+        ``Ascend``  ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
+        >>> output = ops.softplus(input)
+        >>> print(output)
+        [0.7443967 0.79813886 30. 25.]
+    """
+    softplus_op = _get_cache_prim(P.Softplus)()
+    scaling_input = beta * input
+    op_output = (1 / beta) * softplus_op(scaling_input)
+    return ops.select(input * beta > threshold, input, op_output)
 def silu(x):
     r"""
     Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
@@ -2526,6 +2884,9 @@ def selu(input_x):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> output = ops.selu(input_x)
         >>> print(output)
@@ -2559,12 +2920,15 @@ def sigmoid(input):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
         >>> output = ops.sigmoid(input)
         >>> print(output)
         [0.7310586  0.880797   0.95257413 0.98201376 0.9933072 ]
     """
-    return sigmoid_(input)
+    return _get_cache_prim(NN_OPS.Sigmoid)()(input)
 def logsigmoid(x):
@@ -2574,7 +2938,7 @@ def logsigmoid(x):
     Logsigmoid is defined as:
     .. math::
-        \text{logsigmoid}(x_{i}) = log(\frac{1}{1 + \exp(-x_i)}),
+        \text{logsigmoid}(x_{i}) = \log(\frac{1}{1 + \exp(-x_i)}),
     where :math:`x_{i}` is the element of the input.
@@ -2589,22 +2953,178 @@ def logsigmoid(x):
         TypeError: If dtype of `x` is neither float16 nor float32.
     Supported Platforms:
-        ``Ascend`` ``GPU``
+        ``Ascend`` ``GPU``  ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
         >>> output = ops.logsigmoid(x)
         >>> print(output)
         [-0.31326166 -0.12692806 -0.04858734]
     """
-    output = _get_cache_prim(P.Mul)()(x, -1)
-    output = _get_cache_prim(P.Exp)()(output)
-    output = _get_cache_prim(P.Add)()(output, 1)
-    output = _get_cache_prim(P.Reciprocal)()(output)
+    output = _get_cache_prim(P.Sigmoid)()(x)
     ret = _get_cache_prim(P.Log)()(output)
     return ret
+def dense(input, weight, bias=None):
+    r"""
+    Applies the dense connected operation to the `input`. The dense function is defined as:
+    .. math::
+        output = input * weight^{T} + bias
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): Input Tensor of shape :math:`(*, in\_channels)`,
+            where :math:`*` means any number of additional dimensions.
+        weight (Tensor): The weight applied to the input.
+            The shape is :math:`(out\_channels, in\_channels)` or :math:`(in\_channels)`.
+        bias (Tensor, optional): Additive biases to the output.
+            The shape is :math:`(out\_channels)` or :math:`()`. Defaults: ``None``, the `bias` is 0.
+    Returns:
+        Output whose shape is determined by the shape of the input and the weight.
+    Raises:
+        TypeError: If `input` is not Tensor.
+        TypeError: If `weight` is not Tensor.
+        TypeError: If `bias` is not Tensor.
+    Supported Platforms:
+        ``Ascend`` ``GPU``  ``CPU``
+    Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input = mindspore.Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
+        >>> weight = mindspore.Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
+        >>> bias = mindspore.Tensor([0., 1.], mindspore.float32)
+        >>> output = mindspore.ops.dense(input, weight, bias)
+        >>> print(output)
+        [[-4.  0.]
+         [10.  4.]]
+    """
+    _check_is_tensor("input", input, "dense")
+    _check_is_tensor("weight", weight, "dense")
+    _check_is_tensor("bias", bias, "dense")
+    weight = ops.t(weight)
+    input = ops.matmul(input, weight)
+    input_shape = input.shape
+    if bias is not None:
+        input = input + bias
+        _check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
+    return input
+def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
+    """Check that the output has the correct shape after adding bias."""
+    if input_shape != output_shape:
+        raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
+@_primexpr
+def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
+    """check bidense input Tensors' shape"""
+    msg_prefix = f"For '{prim_name}', the" if prim_name else "The"
+    if input1_shape[:-1] != input2_shape[:-1]:
+        raise ValueError(f"{msg_prefix} dimensions except the last of 'input1' must be same as 'input2', but got "
+                         f"{input1_shape} of 'input1' and {input2_shape} of 'input2'")
+def bidense(input1, input2, weight, bias=None):
+    r"""
+    Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
+    .. math::
+        output = x_{1}^{T}Ax_{2} + b
+    :math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
+    :math:`b` represents `bias` .
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input1 (Tensor): Input Tensor of shape :math:`(*, in1\_channels)`,
+            where :math:`*` means any number of additional dimensions. All but the last dimension
+            should be the same with `input2`.
+        input2 (Tensor): Input Tensor of shape :math:`(*, in2\_channels)`,
+            where :math:`*` means any number of additional dimensions. All but the last dimension
+            should be the same with `input1`.
+        weight (Tensor): The weight applied to the input1 and input2.
+            The shape is :math:`(out\_channels, in1\_channels, in2\_channels)`.
+        bias (Tensor, optional): Additive biases to the output.
+            The shape is :math:`(out\_channels)` or :math:`()`. Defaults: ``None`` , the `bias` is 0.
+    Returns:
+        Tensor, shape :math:`(*, out\_channels)`, where :math:`*` means any number of additional dimensions.
+        All but the last dimension should be the same with the input Tensors.
+    Raises:
+        TypeError: If `input1` is not Tensor.
+        TypeError: If `input2` is not Tensor.
+        TypeError: If `weight` is not Tensor.
+        TypeError: If `bias` is not Tensor.
+        ValueError: If dimensions except the last of 'input1' are different from 'input2' .
+    Supported Platforms:
+        ``Ascend`` ``GPU``  ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> input1 = mindspore.Tensor([[-1.1283, 1.2603],
+        ...                            [0.0214, 0.7801],
+        ...                            [-1.2086, 1.2849]], mindspore.float32)
+        >>> input2 = mindspore.Tensor([[-0.4631, 0.3238, 0.4201],
+        ...                            [0.6215, -1.0910, -0.5757],
+        ...                            [-0.7788, -0.0706, -0.7942]], mindspore.float32)
+        >>> weight = mindspore.Tensor([[[-0.3132, 0.9271, 1.1010],
+        ...                             [0.6555, -1.2162, -0.2987]],
+        ...                            [[1.0458, 0.5886, 0.2523],
+        ...                             [-1.3486, -0.8103, -0.2080]],
+        ...                            [[1.1685, 0.5569, -0.3987],
+        ...                             [-0.4265, -2.6295, 0.8535]],
+        ...                            [[0.6948, -1.1288, -0.6978],
+        ...                             [0.3511, 0.0609, -0.1122]]], mindspore.float32)
+        >>> output = ops.bidense(input1, input2, weight)
+        >>> print(output)
+        [[-2.0612743 0.5581219 0.22383511 0.8667302]
+         [1.4476739 0.12626505 1.6552988 0.21297503]
+         [0.6003161 2.912046 0.5590313 -0.35449564]]
+    """
+    _check_is_tensor("input1", input1, "bidense")
+    _check_is_tensor("input2", input2, "bidense")
+    _check_is_tensor("weight", weight, "bidense")
+    _check_is_tensor("bias", bias, "bidense")
+    input1_shape = input1.shape
+    input2_shape = input2.shape
+    check_dense_inputs_same_shape(input1_shape, input2_shape, "bidense")
+    if len(input1_shape) != 2:
+        input1 = input1.reshape((-1, input1_shape[-1]))
+        input2 = input2.reshape((-1, input2_shape[-1]))
+    batch_size = input1.shape[0]
+    matmul_ = P.MatMul()
+    output = matmul_(input1, weight.transpose(1, 2, 0).view(input1_shape[-1], -1))
+    output = output.view(batch_size, input2_shape[-1], weight.shape[0])
+    output = output.transpose(2, 0, 1) * input2
+    output = output.sum(2).swapaxes(0, 1)
+    if bias is not None:
+        bias_add_ = P.BiasAdd()
+        output = bias_add_(output, bias)
+    if len(input1_shape) != 2:
+        output_shape = input1_shape[:-1] + (-1,)
+        output = output.reshape(output_shape)
+    return output
 def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=None, dilations=(1, 1, 1, 1), groups=1,
                       deformable_groups=1, modulated=True):
     r"""
@@ -2640,16 +3160,16 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
         padding (tuple[int]): A tuple of 4 integers. The number of pixels to add to each (top, bottom, left,
             right) side of the input.
         bias (Tensor, optional): An 1D tensor of additive biases to the filter outputs.
-            The shape is :math:`(C_{out})`. Defaults to None.
+            The shape is :math:`(C_{out})`. Default: ``None`` .
         dilations (tuple[int], optional): A tuple of 4 integers. The dilation factor for each dimension of input. The
             dimension order is interpreted according to the data format of `x`. The N and C dimensions must be set
-            to 1. Defaults to (1, 1, 1, 1).
+            to 1. Default: ``(1, 1, 1, 1)`` .
         groups (int, optional): An integer of type int32. The number of blocked connections from input channels
-            to output channels. In_channels and out_channels must both be divisible by `groups`. Defaults to 1.
+            to output channels. In_channels and out_channels must both be divisible by `groups`. Default: ``1`` .
         deformable_groups (int, optional): An integer of type int32. The number of deformable group partitions.
-            In_channels must be divisible by `deformable_groups`. Defaults to 1.
+            In_channels must be divisible by `deformable_groups`. Default: ``1`` .
         modulated (bool, optional): Specifies version of DeformableConv2D, True means v2, False means v1, currently
-            only supports v2. Defaults to True.
+            only supports v2. Default: ``True`` .
     Returns:
         Tensor, A 4D Tensor of output feature map. With the same type as `x`. With the format "NCHW",
@@ -2677,6 +3197,9 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> x = Tensor(np.ones((4, 3, 10, 10)), mstype.float32)
         >>> kh, kw = 3, 3
         >>> weight = Tensor(np.ones((5, 3, kh, kw)), mstype.float32)
@@ -2718,7 +3241,7 @@ def pdist(input, p=2.0):
     Args:
         input (Tensor): Input tensor of shape :math:`(*B, N, M)`. :math:`*B` is batch size, one-dim or multi-dim.
             dtype: float16, float32 or float64.
-        p (float): The order of norm distance, :math:`p∈[0, ∞)`. Default: 2.0.
+        p (float): The order of norm distance, :math:`p∈[0, ∞)`. Default: ``2.0`` .
     Returns:
         Tensor, has the same dtype as `input`.
@@ -2734,6 +3257,8 @@ def pdist(input, p=2.0):
         ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32))
         >>> y = ops.pdist(x, p=2.0)
         >>> print(y)
@@ -2743,7 +3268,7 @@ def pdist(input, p=2.0):
     return pdist_(input)
-@constexpr
+@_primexpr
 def _check_pad_inputs(padding):
     """check the input of pad"""
     if len(padding) % 2 != 0:
@@ -2779,7 +3304,8 @@ def pad(input_x, padding, mode='constant', value=None):
             :math:`\text{padding_top}, \text{padding_bottom}`,
             :math:`\text{padding_front}, \text{padding_back})` and so on.
-        mode (str, optional): Pad filling mode, "constant", "reflect" or "replicate". Default: "constant".
+        mode (str, optional): Pad filling mode, ``"constant"`` , ``"reflect"`` , ``"replicate"``  or ``"circular"`` .
+            Default: ``'constant'`` .
             For "constant" mode, please refer to :class:`mindspore.nn.ConstantPad1d` as an example to understand
             this filling pattern and extend the padding pattern to n dimensions.
@@ -2794,8 +3320,15 @@ def pad(input_x, padding, mode='constant', value=None):
             The replicate mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
             or 4D input, or the last dimension of 2D or 3D input.
+            For "circular" mode, the pixels from one edge of the image are wrapped around to the opposite edge,
+            such that the pixel on the right edge of the image is replaced with the pixel on the left edge,
+            and the pixel on the bottom edge is replaced with the pixel on the top edge.
+            The circular mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
+            or 4D input, or the last dimension of 2D or 3D input.
         value (Union[int, float, None], optional): Valid only in "constant" mode.
             Set the padding value in "constant" mode. If the value is None, 0 is used as the default padding value.
+            Default: ``None`` .
     Returns:
         Tensor, the tensor after padding.
@@ -2808,7 +3341,7 @@ def pad(input_x, padding, mode='constant', value=None):
         ValueError: If mode is not "constant" and value not None.
     Supported Platforms:
-        ``GPU`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore as ms
@@ -2843,6 +3376,18 @@ def pad(input_x, padding, mode='constant', value=None):
            [4. 4. 5. 5.]
            [6. 6. 7. 7.]
            [6. 6. 7. 7.]]]]
+        >>> output3 = ops.pad(x, (1, 1, 2, 1), mode='circular')
+        >>> print(output3)
+        [[[[1. 0. 1. 0.]
+           [3. 2. 3. 2.]
+           [1. 0. 1. 0.]
+           [3. 2. 3. 2.]
+           [1. 0. 1. 0.]]
+          [[5. 4. 5. 4.]
+           [7. 6. 7. 6.]
+           [5. 4. 5. 4.]
+           [7. 6. 7. 6.]
+           [5. 4. 5. 4.]]]]
     """
     if not isinstance(input_x, Tensor):
         raise TypeError(f"For 'pad', the type of 'input_x' must be Tensor, but got {type(input_x)}.")
@@ -2881,19 +3426,17 @@ def relu(input):
     .. math::
-        ReLU(input) = (input)^+ = max(0, input)
+        ReLU(input) = (input)^+ = \max(0, input)
     Note:
         In general, this operator is more commonly used. The difference from `ReLuV2` is that the `ReLuV2` will
         output one more Mask.
     Args:
-        input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions, data type is
-          `number <https://www.mindspore.cn/docs/en/r2.0/api_python/mindspore.html#mindspore.dtype>`_.
+        input (Tensor): Input Tensor of numeric types.
     Returns:
-        Tensor of shape :math:`(N, *)`, with the same dtype and shape as the `input`.
+        Tensor, has the same dtype and shape as `input_x`.
     Raises:
         TypeError: If dtype of `input` is not a number.
@@ -2903,6 +3446,9 @@ def relu(input):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> output = ops.relu(input_x)
         >>> print(output)
@@ -2924,7 +3470,9 @@ def relu6(x):
     It returns :math:`\min(\max(0,x), 6)` element-wise.
     Args:
-        x (Tensor): Tensor of shape :math:`(N, *)` with float16 or float32 data type.
+        x (Tensor): Tensor of shape :math:`(N, *)`,
+            where :math:`*` means any number of additional dimensions.
+            Data type must be float16, float32.
     Returns:
         Tensor, with the same dtype and shape as the `x`.
@@ -2937,6 +3485,9 @@ def relu6(x):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> result = ops.relu6(input_x)
         >>> print(result)
@@ -2967,12 +3518,12 @@ def prelu(x, weight):
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         weight (Tensor):  Weight Tensor. The data type is float16 or float32.
           The weight can only be a Tensor, and the length is the same as the number of channels C of the `input_x`.
-          On GPU devices, when the input is a scalar, the shape is (1,).
+          On GPU devices, when the input is a scalar, the shape is :math:`(1,)` .
     Returns:
         Tensor, with the same shape and dtype as `x`.
-    For detailed information, please refer to :class:`mindspore.nn.PReLU`.
+        For detailed information, please refer to :class:`mindspore.nn.PReLU`.
     Raises:
         TypeError: If dtype of `x` or `weight` is neither float16 nor float32.
@@ -2984,6 +3535,9 @@ def prelu(x, weight):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(-6, 6).reshape((2, 3, 2)), mindspore.float32)
         >>> weight = Tensor(np.array([0.1, 0.6, -0.3]), mindspore.float32)
         >>> output = ops.prelu(x, weight)
@@ -3017,8 +3571,8 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
     Args:
         input  (Tensor): The input of rrelu is a Tensor of any dimension.
-        lower (Union[int, float]): Slope of the activation function at x < 0. Default: 1.0/8.
-        upper (Union[int, float]): Slope of the activation function at x < 0. Default: 1.0/3.
+        lower (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 8`` .
+        upper (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 3`` .
     Returns:
         Tensor, after rrelu, has the same type and shape as the `input`.
@@ -3034,6 +3588,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[-1.0, 4.0], [2.0, 0]]), mindspore.float32)
         >>> output = ops.rrelu(x)
         >>> print(output)
@@ -3041,18 +3598,25 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
          [ 2.          0.        ]]
     """
     if not isinstance(upper, (float, int)):
-        raise TypeError(f"For 'ops.rrelu', `upper` must be an int or a float, but got {type(upper)}")
+        raise TypeError(f"For 'rrelu', 'upper' must be an int or a float, but got {type(upper)}")
     if not isinstance(lower, (float, int)):
-        raise TypeError(f"For 'ops.rrelu', `lower` must be an int or a float, but got {type(lower)}")
+        raise TypeError(f"For 'rrelu', 'lower' must be an int or a float, but got {type(lower)}")
     if lower > upper:
-        raise ValueError(f"For 'ops.rrelu', the value of `upper` must be greater than `lower`, "
+        raise ValueError(f"For 'rrelu', the value of 'upper' must be greater than or equal to 'lower', "
                          f"but got upper: {upper}, lower: {lower}. ")
-    size = input.shape
+    if not isinstance(input, Tensor):
+        raise TypeError(f"For 'rrelu', the 'input' must be a Tensor but got {type(input)}.")
+    _lower = Tensor(lower, mstype.float32)
+    _upper = Tensor(upper, mstype.float32)
+    _size = input.shape
+    if ops.is_sequence_value_unknown(_size):
+        dyn_shape = _get_cache_prim(P.TensorShape)()
+        _size = dyn_shape(input)
     sign_matrix = _get_cache_prim(P.Sign)()(input)
     negative_filter = sign_matrix.clip(None, 0)
     positive_filter = sign_matrix.clip(0, None)
-    dtype = _get_cache_prim(P.DType)()(input)
-    mask = _get_cache_prim(P.Cast)()(Tensor(np.random.uniform(lower, upper, size=size)), dtype)
+    _dtype = _get_cache_prim(P.DType)()(input)
+    mask = ops.uniform(_size, _lower, _upper).astype(_dtype)
     negative_mask = negative_filter * mask * -1
     total_mask = negative_mask + positive_filter
     out = total_mask * input
@@ -3098,6 +3662,7 @@ def mirror_pad(input_x, paddings, mode):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor([[1,2,3], [4,5,6], [7,8,9]])
         >>> mode = "REFLECT"
         >>> paddings = Tensor([[1, 1], [2, 2]])
@@ -3134,10 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
           l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
           \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
-      where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
-      N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
+      where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
+      :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
-      If reduction is not 'none' (default 'mean'), then
+      If `reduction` is not ``None`` (default ``'mean'`` ), then
       .. math::
@@ -3156,10 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
           \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
           l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
-      where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
-      N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
+      where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
+      :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
-      If reduction is not 'none' (default 'mean'), then
+      If `reduction` is not ``None`` (default ``'mean'`` ), then
       .. math::
@@ -3171,20 +3736,25 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
               \end{cases}
     Args:
-        input (Tensor): :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
+        input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
             in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
             `input` is expected to be log-probabilities, data type must be float16 or float32.
-        target (Tensor): :math:`(N)` or :math:`(N, d_1, d_2, ..., d_K)` for
-            high-dimensional loss.
+        target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
+            :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
+            :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
         weight (Tensor): A rescaling weight applied to the loss of each batch element.
-            If not None, the shape is :math:`(C,)`,
-            data type must be float16 or float32. Default: None.
+            If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
         ignore_index (int): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default: -100
-        reduction (str):  Apply specific reduction method to the output: 'none', 'mean', or 'sum'.
-            Default: 'mean'.
+            and does not contribute to the input gradient. Default: ``-100`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
         label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
-            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: 0.0.
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
     Returns:
         Tensor, the computed loss value.
@@ -3193,17 +3763,25 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore as ms
+        >>> import numpy as np
         >>> # Case 1: Indices labels
-        >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
-        >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
-        >>> output = ops.cross_entropy(inputs, target)
+        >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
+        >>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
+        >>> output = ms.ops.cross_entropy(inputs, target)
         >>> # Case 2: Probability labels
-        >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
-        >>> target = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
-        >>> output = ops.cross_entropy(inputs, target)
+        >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
+        >>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
+        >>> output = ms.ops.cross_entropy(inputs, target)
     """
+    _check_is_tensor('input', input, "cross_entropy_loss")
+    _check_is_tensor('target', target, "cross_entropy_loss")
+    _check_is_tensor('weight', weight, "cross_entropy_loss")
+    check_int_const(ignore_index, 'ignore_index', "cross_entropy_loss")
+    check_non_negative_float_const(label_smoothing, 'label_smoothing', "cross_entropy_loss")
+    check_string_const(reduction, ['none', 'mean', 'sum'], 'reduction', "cross_entropy_loss")
     class_dim = 0 if input.ndim == 1 else 1
-    if input.size == target.size:
+    if target.dtype in [mstype.float32, mstype.float16]:
         return _cross_entropy(input, target, class_dim, weight, reduction, label_smoothing)
     return nll_loss(_innner_log_softmax(input, class_dim), target, weight, ignore_index, reduction, label_smoothing)
@@ -3246,9 +3824,10 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
         \{c \not= \text{ignore_index}\},
     where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
-    N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
+    N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
+    classes.
-    If reduction is not 'none' (default 'mean'), then
+    If `reduction` is not ``None`` (default 'mean'), then
     .. math::
@@ -3265,13 +3844,18 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
             high-dimensional loss, data type must be int32.
         weight (Tensor): A rescaling weight applied to the loss of each batch element.
             If not None, the shape is :math:`(C,)`.
-            The data type must be float16 or float32. Default: None.
+            The data type must be float16 or float32. Default: ``None`` .
         ignore_index (int): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default: -100
-        reduction (str):  Apply specific reduction method to the output: 'none', 'mean', or 'sum'.
-            Default: 'mean'.
+            and does not contribute to the input gradient. Default: ``-100`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
         label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
-            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: 0.0.
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
     Returns:
         Tensor, the computed loss value.
@@ -3280,7 +3864,9 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
         >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
         >>> output = ops.nll_loss(inputs, target)
@@ -3361,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
     r"""
     Calculate the mean absolute error between the `input` value and the `target` value.
-    Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none" ,
+    Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
     then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
     The formula is as follows:
@@ -3371,7 +3957,7 @@ def l1_loss(input, target, reduction='mean'):
     where :math:`N` is the batch size.
-    If `reduction` is mean or sum, then:
+    If `reduction` is ``"mean"`` or ``"sum"`` , then:
     .. math::
         \ell(x, y) =
@@ -3384,22 +3970,28 @@ def l1_loss(input, target, reduction='mean'):
         input (Tensor): Predicted value, Tensor of any dimension.
         target (Tensor): Target value, usually has the same shape as the `input`.
             If `input` and `target` have different shape, make sure they can broadcast to each other.
-        reduction (str, optional): Type of reduction to be applied to loss. The optional value is "mean", "sum" or
-            "none". Default: "mean".
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
+        Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
         Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `input` is not a Tensor.
         TypeError: If `target` is not a Tensor.
-        ValueError: If `reduction` is not one of "none", "mean" or "sum".
+        ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> x = ms.Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
         >>> target = ms.Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
         >>> output = ops.l1_loss(x, target, reduction="mean")
@@ -3441,22 +4033,27 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
         \end{cases}
     Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
-    :math:`\text{beta}>0` , its default value is 1.0. :math:`N` is the batch size.
+    :math:`\text{beta}>0` , its default value is ``1.0`` . :math:`N` is the batch size.
     Args:
         input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
         beta (float): A parameter used to control the point where the function will change between
-            L1 to L2 loss. The value should be greater than zero. Default: 1.0.
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean' or 'sum'. Default: 'none'.
+            L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'none'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor, if `reduction` is 'none', then output is a tensor with the same shape as `input`.
-        Otherwise, the shape of output tensor is `(1,)`.
+        Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
+        Otherwise, the shape of output tensor is :math:`(1,)`.
     Raises:
         TypeError: If `beta` is not a float.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
         TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
         ValueError: If `beta` is less than or equal to 0.
         ValueError: If shape of `input` is not the same as `target`.
@@ -3465,6 +4062,9 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
         >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
         >>> output = ops.smooth_l1_loss(logits, labels)
@@ -3505,6 +4105,8 @@ def threshold(input, thr, value):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
         >>> inputs = mindspore.Tensor([0.0, 2, 3], mindspore.float32)
         >>> outputs = ops.threshold(inputs, 1, 100)
         >>> print(outputs)
@@ -3514,7 +4116,11 @@ def threshold(input, thr, value):
     _check_value_type("thr", thr, [float, int], "threshold")
     _check_value_type("value", value, [float, int], "threshold")
     cond = _get_cache_prim(P.Greater)()(input, thr)
-    value = _get_cache_prim(P.Fill)()(input.dtype, input.shape, value)
+    input_type = input.dtype
+    value = Tensor(value, input_type)
+    input_shape = input.shape
+    shape_tensor = _get_cache_prim(TupleToTensor)()(input_shape, mstype.int64)
+    value = _get_cache_prim(P.FillV2)()(shape_tensor, value)
     return _get_cache_prim(P.Select)()(cond, input, value)
@@ -3536,7 +4142,7 @@ def leaky_relu(input, alpha=0.2):
     Args:
         input (Tensor): The input of leaky_relu is a Tensor of any dimension.
         alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
-          Default: 0.2.
+          Default: ``0.2`` .
     Returns:
         Tensor, has the same type and shape as the `input`.
@@ -3549,6 +4155,9 @@ def leaky_relu(input, alpha=0.2):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> print(ops.leaky_relu(x, alpha=0.2))
         [[-0.2  4.  -1.6]
@@ -3559,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
     select_op = _get_cache_prim(P.Maximum)()
     if alpha > 1:
         select_op = _get_cache_prim(P.Minimum)()
+    alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
     return select_op(alpha * input, input)
@@ -3587,6 +4197,9 @@ def intopk(x1, x2, k):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
         >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
         >>> output = ops.intopk(x1, x2, 3)
@@ -3611,7 +4224,7 @@ def log_softmax(logits, axis=-1):
     Args:
         logits (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, with float16 or float32 data type.
-        axis (int): The axis to perform the Log softmax operation. Default: -1.
+        axis (int): The axis to perform the Log softmax operation. Default: ``-1`` .
     Returns:
         Tensor, with the same type and shape as the logits.
@@ -3620,11 +4233,15 @@ def log_softmax(logits, axis=-1):
         TypeError: If `axis` is not an int.
         TypeError: If dtype of `logits` is neither float16 nor float32.
         ValueError: If `axis` is not in range [-len(logits.shape), len(logits.shape)).
+        ValueError: If dimension of `logits` is less than 1.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
         >>> output = ops.log_softmax(logits)
         >>> print(output)
@@ -3638,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
     r"""
     Local Response Normalization.
+    .. warning::
+        lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
+        normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
     .. math::
         b_{c} = a_{c}\left(k + \frac{\alpha}{n}
@@ -3648,11 +4269,12 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
     where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`.
     Args:
-        depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: 5.
-        bias (float): An offset (usually positive to avoid dividing by 0). Default: 1.0.
-        alpha (float): A scale factor, usually positive. Default: 1.0.
-        beta (float): An exponent. Default: 0.5.
-        norm_region (str): Specifies normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
+        depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: ``5`` .
+        bias (float): An offset (usually positive to avoid dividing by 0). Default: ``1.0`` .
+        alpha (float): A scale factor, usually positive. Default: ``1.0`` .
+        beta (float): An exponent. Default: ``0.5`` .
+        norm_region (str): Specifies normalization region. Options: ``"ACROSS_CHANNELS"`` .
+            Default: ``"ACROSS_CHANNELS"`` .
         x (Tensor): A 4-D Tensor with float16 or float32 data type.
     Returns:
@@ -3665,9 +4287,12 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
         TypeError: If `x` is not a Tensor.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[[[0.1], [0.2]],
         ...                       [[0.3], [0.4]]]]), mindspore.float32)
         >>> output = ops.lrn(input_x)
@@ -3695,29 +4320,39 @@ def mish(x):
     <https://arxiv.org/abs/1908.08681>`_.
     Args:
-        x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-            additional dimensions, with float16 or float32 data type.
+        x (Tensor): The input Tensor.
+            Supported dtypes:
+            - GPU/CPU: float16, float32, float64.
+            - Ascend: float16, float32.
     Returns:
         Tensor, with the same type and shape as the `x`.
     Raises:
-        TypeError: If dtype of `x` is neither float16 nor float32.
+        TypeError: If dtype of `x` is not float16, float32 or float64.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> output = ops.mish(input_x)
         >>> print(output)
         [[-3.0340147e-01  3.9974129e+00 -2.68311895e-03]
          [ 1.9439590e+00  -3.3576239e-02 8.99999990e+00]]
+        >>> input_x = Tensor(2.1, mindspore.float32)
+        >>> output = ops.mish(input_x)
+        >>> print(output)
+        2.050599
     """
     return mish_(x)
-@constexpr
+@_primexpr
 def _check_value_type(arg_name, arg_value, valid_types, prim_name=None):
     """Checks whether a value is instance of some types."""
     return validator.check_value_type(arg_name, arg_value, valid_types, prim_name)
@@ -3726,8 +4361,8 @@ def _check_value_type(arg_name, arg_value, valid_types, prim_name=None):
 @constexpr(check=False)
 def _check_is_tensor(param_name, input_data, cls_name):
     """Internal function, used to check whether the input data is Tensor."""
-    if input_data is not None and not isinstance(ops.typeof(input_data), mstype.tensor_type):
-        raise TypeError(f"For '{cls_name}', the '{param_name}' must be '{mstype.tensor_type}', "
+    if input_data is not None and not isinstance(ops.typeof(input_data), mstype.TensorType):
+        raise TypeError(f"For '{cls_name}', the '{param_name}' must be a Tensor, "
                         f"but got '{ops.typeof(input_data)}'")
@@ -3790,10 +4425,54 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
 def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
-    """
+    r"""
     MarginRankingLoss creates a criterion that measures the loss.
-    For details, please refer to :class:`mindspore.nn.MarginRankingLoss`.
+    Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
+    the operation is as follows:
+    .. math::
+        \text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
+    Args:
+        input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+        input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
+        target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
+          :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
+        margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
+    Returns:
+        Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
+        Otherwise, a scalar value will be returned.
+    Raises:
+        TypeError: If `margin` is not a float.
+        TypeError: If `input1`, `input2` or `target` is not a Tensor.
+        TypeError: If the types of `input1` and `input2` are inconsistent.
+        TypeError: If the types of `input1` and `target` are inconsistent.
+        ValueError: If the shape of `input1` and `input2` are inconsistent.
+        ValueError: If the shape of `input1` and `target` are inconsistent.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore as ms
+        >>> from mindspore import Tensor, ops
+        >>> import numpy as np
+        >>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
+        >>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
+        >>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
+        >>> output = ops.margin_ranking_loss(input1, input2, target)
+        >>> print(output)
+        1.2293333
     """
     margin = _check_value_type("margin", margin, [float], "margin_ranking_loss")
     _check_is_tensor('input1', input1, "margin_ranking_loss")
@@ -3830,23 +4509,30 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
         input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
         target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
           :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
-        margin (float, optional): Should be in [-1.0, 1.0]. Default 0.0.
-        reduction (str, optional): Specifies which reduction to be applied to the output. It must be one of
-          "none", "mean", and "sum", meaning no reduction, reduce mean and sum on output, respectively. Default "mean".
+        margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar, if `reduction` is "none", its shape is the same as `target`.
+        Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
         Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `margin` is not a float.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
         ValueError: If `margin` is not in range [-1, 1].
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> intput1 = Tensor(np.array([[0.3, 0.8], [0.4, 0.3]]), mindspore.float32)
         >>> intput2 = Tensor(np.array([[0.4, 1.2], [-0.4, -0.9]]), mindspore.float32)
         >>> target = Tensor(np.array([1, -1]), mindspore.int32)
@@ -3909,13 +4595,14 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
             three int numbers that represent depth, height and width respectively.
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the depth, height and width of movement are both stride, or a tuple of three int numbers that
-            represent depth, height and width of movement respectively. Default: `kernel_size`.
+            represent depth, height and width of movement respectively.
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
         padding (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both
             strides, or a tuple of three int numbers that represent depth, height and width of movement respectively.
-            Default: 0.
-        dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: 1.
-        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
-        return_indices (bool): Whether to output the indices of max value. Default: False.
+            Default: ``0`` .
+        dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
+        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
+        return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
     Returns:
         If `return_indices` is False, return a Tensor `output`, else return a tuple (`output`, `argmax`).
@@ -3923,7 +4610,7 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
         - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
           It has the same data type as `x`.
         - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
-          only when `return_indices` is True.
+          only when `return_indices` is ``True`` .
     Raises:
         TypeError: If `x` is not a Tensor.
@@ -3936,6 +4623,9 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32)
         >>> output_tensor, argmax = ops.max_pool3d(x, kernel_size=2, stride=1, padding=1, return_indices=True)
         >>> print(output_tensor.shape)
@@ -3963,8 +4653,8 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
     For each output location `output[n, :, h, w]`, the size-2 vector `grid[n, h, w]` specifies `input` pixel
     locations `x` and `y`, which are used to interpolate the output value `output[n, :, h, w]`. In the case of 5D
     inputs, `grid[n, d, h, w]`, specifies the `x`, `y`, `z` pixel locations for interpolating
-    `output[n, :, d, h, w]`. And `mode` argument specifies "nearest" or "bilinear" or "bicubic"
-    (supported in 4D case only) interpolation method to sample the input pixels.
+    `output[n, :, d, h, w]`. And `mode` argument specifies "nearest" or "bilinear" ("bicubic" is not supported yet)
+    interpolation method to sample the input pixels.
     `grid` specifies the sampling pixel locations normalized by the `input` spatial dimensions. Therefore, it should
     have most values in the range of :math:`[-1, 1]`.
@@ -3981,15 +4671,26 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
         grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
             H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
         mode (str): An optional string specifying the interpolation method. The optional values are
-            "bilinear", "nearest" or "bicubic". Default: "bilinear". Note: `bicubic` supports only 4-D input. When
+            ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
             `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
             be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
+            Default: ``'bilinear'`` .
+            - ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
+              nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
+            - ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
+              pixels, computed using bilinear interpolation. This method produces smoother results compared
+              to nearest neighbor interpolation.
+            - ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
+              It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
+              the third dimension. It is commonly used for volume or 3D image interpolation.
         padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
-            "reflection". Default: "zeros".
+            "reflection". Default: ``'zeros'`` .
         align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
             the center points of the input’s corner pixels. If set to `False`, they are instead considered as referring
             to the corner points of the input’s corner pixels, making the sampling more resolution agnostic. Default:
-            `False`.
+            ``False`` .
     Returns:
         Tensor, dtype is the same as `input` and whose shape is :math:`(N, C, H_{out}, W_{out})` (4-D) and
@@ -4003,13 +4704,15 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
         ValueError: If the rank of `input` or `grid` is not equal to 4(4-D case) or 5(5-D case).
         ValueError: If the first dimension of `input` is not equal to that of `grid`.
         ValueError: If the last dimension of `grid` is not equal to 2(4-D case) or 3(5-D case).
-        ValueError: If `mode` is not "bilinear", "nearest", "bicubic" or a string value.
+        ValueError: If `mode` is not "bilinear", "nearest" or a string value.
         ValueError: If `padding_mode` is not "zeros", "border", "reflection" or a string value.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.arange(16).reshape((2, 2, 2, 2)).astype(np.float32))
         >>> grid = Tensor(np.arange(0.2, 1, 0.1).reshape((2, 2, 1, 2)).astype(np.float32))
         >>> output = ops.grid_sample(input_x, grid, mode='bilinear', padding_mode='zeros',
@@ -4061,11 +4764,15 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
         targets (Tensor): Target sequences. A tensor of shape :math:`(N, S)`, where S is max target length.
         input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
         target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
-        blank (int, optional): The blank label. Default: 0.
-        reduction (str, optional): Implements the reduction method to the output with 'none', 'mean', or 'sum',
-            respectively indicate that no calculation is specified, that the mean is used, and that is calculated
-            using summation. Default: 'mean'.
-        zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: False.
+        blank (int, optional): The blank label. Default: ``0`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
+        zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
     Returns:
         neg_log_likelihood (Tensor), A loss value with shape :math:`(N)` , which is differentiable with respect to
@@ -4081,7 +4788,6 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
         ValueError: If the rank of `targets` is not 2.
         ValueError: If the shape of `input_lengths` does not match N. N is batch size of `log_probs` .
         ValueError: If the shape of `target_lengths` does not match N. N is batch size of `log_probs` .
-        TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
         ValueError: If the value of `blank` is not in range [0, num_labels|C). C is number of classes of `log_probs` .
         RuntimeError: If any value of `input_lengths` is larger than T. T is the length of `log_probs`.
         RuntimeError: If any target_lengths[i] is not in range [0, input_length[i]].
@@ -4090,6 +4796,9 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]],
         ...                              [[0.9, 0.4, 0.2]]]).astype(np.float32))
         >>> targets = Tensor(np.array([[0, 1]]), mstype.int32)
@@ -4131,11 +4840,9 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
         \ \text{eps}\right)\right) + \frac{\left(\text{x} - \text{target}\right)^2}
         {\text{max}\left(\text{var}, \ \text{eps}\right)}\right) + \text{const.}
-    where :math:`eps` is used for stability of :math:`log`. When :math:`full=True`,
-    a constant will be added to the loss. If
-    the shape of :math:`var` and :math:`logits` are not the same (due to a homoscedastic assumption),
-    their shapes must allow
-    correct broadcasting.
+    where :math:`eps` is used for stability of :math:`log`. When :math:`full=True`, a constant will be added to the
+    loss. If the shape of :math:`var` and :math:`logits` are not the same (due to a homoscedastic assumption),
+    their shapes must allow correct broadcasting.
     Args:
         x (Tensor): Tensor of shape :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of
@@ -4146,18 +4853,20 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
             but with one dimension equal to 1, or same shape as the x but with one fewer dimension
             (to allow for broadcasting).
         full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
-            the constant term will be :math:`const = 0.5*log(2\pi)`. Default: False.
-        eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: 1e-6.
-        reduction (str, optional): Apply specific reduction method to the
-            output: "none", "mean", or "sum". Default: "mean".
+            the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
+        eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
         Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
     Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If `target` is not a Tensor.
-        TypeError: If `var` is not a Tensor.
+        TypeError: If `x`, `target` or `var` is not a Tensor.
         TypeError: If `full` is not a bool.
         TypeError: If `eps` is not a float.
         ValueError: If `eps` is not a float within (0, inf).
@@ -4168,8 +4877,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
     Examples:
         >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.ops as ops
+        >>> from mindspore import Tensor, ops
         >>> import mindspore.common.dtype as mstype
         >>> arr1 = np.arange(8).reshape((4, 2))
         >>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
@@ -4199,6 +4907,14 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
     if reduction not in ('none', 'mean', 'sum'):
         raise ValueError(f"For 'gaussian_nll_loss', 'reduction' must be one of 'none', 'mean', or 'sum',\
         but got {reduction}.")
+    if not x.shape == var.shape:
+        if x.shape[:-1] == var.shape:
+            var = var.unsqueeze(dim=-1)
+        # Heterosclerotic case
+        elif x.shape[:-1] == var.shape[:-1] and var.shape[-1] == 1:
+            pass
+        else:
+            raise ValueError(f"For 'gaussian_nll_loss', 'var' must be able to correctly broadcast to 'x' and 'target'.")
     max_op = P.Maximum()
     log_op = P.Log()
     square_op = P.Square()
@@ -4215,9 +4931,30 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
 @_primexpr
-def _check_hinge_embedding_loss(shape, shape2, prim_name):
+def _check_hinge_embedding_loss(shape, shape2):
     if shape2 != shape:
-        raise ValueError(f"For '{prim_name}' the input tensor and the labels must have the same shape.")
+        raise ValueError(f"For 'HingeEmbeddingLoss' the input tensor and the labels must have the same shape.")
+@_primexpr
+def _check_hinge_embedding_loss_type(inputs_dtype, targets_dtype, inputs, targets, margin, reduction):
+    """Check hinge embedding loss type."""
+    if not isinstance(margin, (float, int)):
+        raise TypeError(f"For 'HingeEmbeddingLoss', 'margin' must be a float or int, but got {type(margin)}.")
+    if reduction not in ['none', 'mean', 'sum']:
+        raise ValueError(f"For 'HingeEmbeddingLoss', 'reduction' must be one of 'none', 'mean', 'sum',"
+                         f"but got {reduction}.")
+    if not isinstance(inputs, Tensor):
+        raise TypeError(f"For 'HingeEmbeddingLoss', the first input must be a Tensor, but got {type(inputs)}.")
+    if not isinstance(targets, Tensor):
+        raise TypeError(f"For 'HingeEmbeddingLoss', the second input must be a Tensor, but got {type(targets)}.")
+    if inputs_dtype not in mstype.float_type:
+        raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the first input must be float, but got "
+                        f"{inputs_dtype}.")
+    if targets_dtype not in mstype.float_type:
+        raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the second input must be float, but got "
+                        f"{targets_dtype}.")
 def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
@@ -4247,9 +4984,13 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
         targets (Tensor): Label values, represented as :math:`y` in the formula.
             Has the same shape as `inputs`, contains -1 or 1.
         margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
-            Represented as :math:`\Delta` in the formula. Default: 1.0.
-        reduction (str): Specify the computing method to be applied to the outputs: 'none', 'mean', or 'sum'.
-            Default: 'mean'.
+            Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
         Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4259,7 +5000,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
         TypeError: If `targets` is not a Tensor.
         TypeError: If `margin` is not a float or int.
         ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -4277,30 +5018,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
         >>> print(loss)
         0.16666666
     """
-    def _check(inputs_dtype):
-        targets_dtype = targets.dtype
-        if not isinstance(margin, (float, int)):
-            raise TypeError(f"For 'HingeEmbeddingLoss', 'margin' must be a float or int, but got {type(margin)}.")
-        if reduction not in ['none', 'mean', 'sum']:
-            raise ValueError(f"For 'HingeEmbeddingLoss', 'reduction' must be one of 'none', 'mean', 'sum',"
-                             f"but got {reduction}.")
-        if not isinstance(inputs, Tensor):
-            raise TypeError(f"For 'HingeEmbeddingLoss', the first input must be a Tensor, but got {type(inputs)}.")
-        if not isinstance(targets, Tensor):
-            raise TypeError(f"For 'HingeEmbeddingLoss', the second input must be a Tensor, but got {type(targets)}.")
-        if inputs_dtype not in mstype.float_type:
-            raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the first input must be float, but got "
-                            f"{inputs_dtype}.")
-        if targets_dtype not in mstype.float_type:
-            raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the second input must be float, but got "
-                            f"{targets_dtype}.")
     inputs_dtype = inputs.dtype
-    _check(inputs_dtype)
+    targets_dtype = targets.dtype
+    _check_hinge_embedding_loss_type(inputs_dtype, targets_dtype, inputs, targets, margin, reduction)
     _shape = inputs.shape
     _t_shape = targets.shape
-    _check_hinge_embedding_loss(_shape, _t_shape, 'HingeEmbeddingLoss')
+    _check_hinge_embedding_loss(_shape, _t_shape)
     min_val = Tensor(0, inputs_dtype)
     pos_index = targets > 0
@@ -4323,6 +5046,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
     r"""
     Performs greedy decoding on the logits given in inputs.
+    Note:
+        On Ascend, 'merge_repeated' can not be set to false.
     Args:
         inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
             :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
@@ -4330,7 +5056,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
             Default blank label is `num_classes - 1`. Data type must be float32 or float64.
         sequence_length (Tensor): A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`.
             The type must be int32. Each value in the tensor must be equal to or less than `max_time`.
-        merge_repeated (bool): If true, merge repeated classes in output. Default: True.
+        merge_repeated (bool): If ``true`` , merge repeated classes in output. Default: ``True`` .
     Returns:
         decoded_indices (Tensor), A tensor with shape of :math:`(total\_decoded\_outputs, 2)`.
@@ -4355,6 +5081,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]],
         ...                           [[0.0, 0.6, 0.0], [0.5, 0.4, 0.5]]]), mindspore.float32)
         >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
@@ -4442,6 +5171,9 @@ def conv3d_transpose(inputs, weight, pad_mode='valid', padding=0, stride=1, dila
         TypeError: If data type of dout and weight is not float16.
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor
         >>> dout = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float16)
         >>> weight = Tensor(np.ones([16, 3, 4, 6, 2]), mindspore.float16)
         >>> output = conv3d_transpose(dout, weight)
@@ -4472,87 +5204,111 @@ def _manipulate_padding(padding, dim):
     return ms_padding
-def _manipulate_dilation(dilation, dim=1):
-    """convert 1d dilation to 2d"""
-    if isinstance(dilation, int):
-        return 1, dilation
-    if isinstance(dilation, (tuple, list)):
-        if len(dilation) != 1:
-            raise ValueError(f"For 'conv{dim}d', dilation must be a tuple/list with 1 element or int, \
-            but got {dilation}.")
-        return 1, dilation[0]
-    return dilation
+def _dim_manipulation(x, name):
+    """convert 1d dilation, stride, etc. to 2d"""
+    if isinstance(x, int):
+        if x <= 0:
+            raise ValueError(f"For 'conv1d', {name} must be a positive int, but got {x}.")
+        return 1, x
+    if isinstance(x, (tuple, list)):
+        if len(x) != 1:
+            raise ValueError(f"For 'conv1d', {name} must be a tuple/list with 1 element or int, but got {x}.")
+        if x[0] <= 0:
+            raise ValueError(f"For 'conv1d', elements in {name} must be positive int, but got {x}.")
+        return 1, x[0]
+    raise ValueError(f"For 'conv1d', {name} must be an int or a tuple/list with 1 element, but got {x}.")
+def _check_conv_iterable_lengths(iterable, dim, iter_name):
+    """check iterables lengths used in conv functions"""
+    if len(iterable) != dim:
+        raise ValueError(f"For 'conv{dim}d', the {iter_name} must be a int or a tuple/list with length {dim}, "
+                         f"but got {iterable}.")
 def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
     r"""
-    Applies a 1D convolution over an input tensor.
-    The input tensor is typically of shape :math:`(N, C_{in}, W_{in})`,
-    where :math:`N` is batch size, :math:`C_{in}` is channel number, :math:`W` is width, :math:`X_i` is
-    the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
-    For each batch of shape :math:`(C_{in}, W_{in})`, the formula is defined as:
+    Applies a 1D convolution over an input tensor. The input Tensor is typically
+    of shape :math:`(N, C_{in}, L_{in})`,
+    where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
+    The output is calculated based on formula:
     .. math::
-        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{j}, X_i) + b_j,
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
+    where :math:`bias` is the output channel bias, :math:`ccor` is
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
+    Here are the indices' meanings:
+    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
+    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
+      output channels, which is also equal to the number of kernels.
-    where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_  operator,
-    :math:`C_{in}` is the input channel number, :math:`j` ranges
-    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
-    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
-    of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
-    the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
-    where `groups` is the group number to split the input in the channel dimension.
+    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+      input channels, which is also equal to the number of channels in the convolutional kernels.
-    If the `pad_mode` is set to be "valid", the output width will be :math:`\left \lfloor{
-    1 + \frac{W_{in} + \text{padding[0]} - \text{kernel_size} - (\text{kernel_size} - 1) \times(\text{dilation} - 1)}
-    {\text { stride }}} \right \rfloor`.
+    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
+    channel in the :math:`i`-th batch of the input feature map.
-    where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
-    :math:`padding` is zero-padding added to both sides of the input.
-    For output width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv1d
-    <https://www.mindspore.cn/docs/en/r2.0/api_python/nn/mindspore.nn.Conv2d.html>`_.
+    The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
+    where :math:`kernel\_size` is the width of the kernel.
+    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
+    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
+    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
-    The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
-    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
-    `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
+    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
+    and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
     Note:
         On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
         That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
     Args:
-        input (Tensor): Tensor of shape :math:`(N, C_{in}, W_{in})`.
-        weight (Tensor): Tensor of shape
-            :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel is
-            :math:`(\text{kernel_size})`.
-        bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default: None.
+        input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
+        weight (Tensor): The convolutional kernel value, it should has shape
+            :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is None, zeros will be used. Default: ``None`` .
         stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
-            that represents width of movement. Default: 1.
+            that represents width of movement. Default: ``1``.
         pad_mode (str, optional): Specifies padding mode. The optional values are
-            "same", "valid" and "pad". Default: "valid".
+            ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
-            - same: Adopts the way of completion. The height and width of the output will be equal to
+            - ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
               the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
               Otherwise, the last extra padding will be calculated from the right side.
               If this mode is set, `padding` must be 0.
-            - valid: Adopts the way of discarding. The possible largest width of output will be returned
+            - ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
               without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
-            - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
+            - ``"pad"``: Implicit paddings on both sides of the input `x`.
+              The number of `padding` will be padded to the input
               Tensor borders. `padding` must be greater than or equal to 0.
-        padding (Union(int, tuple[int]), optional): Implicit paddings on both sides of `input`, meaning the paddings of
-            left and right are the same, equal to padding or padding[0] when padding is a tuple of 1 integer.
-            Default: 0.
-        dilation (Union(int, tuple[int]), optional): Gaps between kernel elements. The data type is int or a tuple of
-            1 integer. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
-            there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than or
-            equal to 1 and bounded by the width of `input`. Default: 1.
-        groups (int, optional): Splits `input` into groups. Default: 1.
+        padding (Union(int, tuple[int], list[int]), optional):  Specifies the amount of padding to apply on
+            both side of `input` when `pad_mode` is set to ``"pad"``. The
+            paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
+            1 integer. Default: ``0`` .
+        dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
+            It can be a single int or a tuple of 1 integer.
+            Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
+            spacing of :math:`d0-1` elements in the width direction.
+            The value should be in the ranges [1, L].
+            Default: ``1`` .
+        groups (int, optional): Splits `input` into groups. Default: ``1`` .
     Returns:
-        Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, W_{out})`.
+        Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv1d` for more details.
     Raises:
         TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -4565,16 +5321,23 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         ValueError: If `pad_mode` is not equal to 'pad' and `padding` is greater than 0.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend`` ``GPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(64).reshape((4, 4, 4)), mindspore.float32)
-        >>> weight = Tensor(np.arange(8).rehspe((2, 2, 2)), mindspore.float32)
+        >>> weight = Tensor(np.arange(8).reshape((2, 2, 2)), mindspore.float32)
         >>> bias = Tensor([-0.12345, 2.7683], ms.float32)
         >>> output = ops.conv1d(x, weight, pad_mode='pad', padding=(1,), bias=bias, groups=2)
         >>> print(output.shape)
         (4, 2, 5)
     """
+    if input.ndim != 3:
+        raise ValueError(f"For 'conv1d', the input must be a 3D Tensor, but got input of {input.ndim}D.")
+    if weight.ndim != 3:
+        raise ValueError(f"For 'conv1d', the weight must be a 3D Tensor, but got input of {weight.ndim}D.")
     _expand = _get_cache_prim(P.ExpandDims)()
     expanded_input = _expand(input, 2)
     sqz = _get_cache_prim(P.Squeeze)(2)
@@ -4589,8 +5352,15 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
             raise ValueError(f"For 'conv1d', padding must be a tuple or list with 1 element or int, but got {padding}.")
         padding = (0, 0, padding[0], padding[0])
     else:
-        raise ValueError(f"For 'conv1d', padding must be a tuple, list or int, but got {type(padding)}.")
-    dilation = _manipulate_dilation(dilation)
+        raise TypeError(f"For 'conv1d', padding must be a tuple, list or int, but got {type(padding)}.")
+    input_shape = input.shape
+    in_channel = input_shape[1]
+    if not (in_channel % groups == 0 and out_channel % groups == 0):
+        raise ValueError(f"The argument 'groups' should be divisible by 'in_channel' " \
+                         f"and 'out_channel', but got group:{groups}, in_channel:{in_channel}, " \
+                         f"out_channel:{out_channel}.")
+    dilation = _dim_manipulation(dilation, name='dilation')
+    stride = _dim_manipulation(stride, name='stride')
     conv = _get_cache_prim(P.Conv2D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCHW")
     conv_res = conv(expanded_input, expanded_weight)
     squeezed_conv_res = sqz(conv_res)
@@ -4598,46 +5368,53 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         return squeezed_conv_res
     if not isinstance(bias, Tensor):
         raise TypeError(f"For 'conv1d', the 'bias' must be a Tensor, but got {type(bias)}.")
+    if bias.shape[0] != out_channel:
+        raise ValueError(f"For 'conv1d', given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
+                        f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
     output = bias_add(squeezed_conv_res, bias)
     return output
 def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
     r"""
-    Applies a 2D convolution over an input tensor.
-    The input tensor is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
-    where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
-    the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
-    For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
+    Applies a 2D convolution over an input tensor. The input tenor is typically of
+    shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
+    channel number, :math:`H` is feature height, :math:`W` is feature width.
+    The output is calculated based on formula:
     .. math::
-        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
+    where :math:`bias` is the output channel bias, :math:`ccor` is
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
+    Here are the indices' meanings:
+    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
-    where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_  operator,
-    :math:`C_{in}` is the input channel number, :math:`j` ranges
-    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
-    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
-    of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
-    kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
-    The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
-    where `groups` is the group number to split the input in the channel dimension.
+    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
+      output channels, which is also equal to the number of kernels.
-    If the `pad_mode` is set to be "valid", the output height and width will be :math:`\left \lfloor{
-    1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
-    (\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
+    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+      input channels, which is also equal to the number of channels in the convolutional kernels.
-    :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
-    (\text{kernel_size[1]} - 1) \times(\text{dilation[1]} - 1)} {\text { stride[1] }}} \right \rfloor` respectively.
+    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
+    channel in the :math:`i`-th batch of the input feature map.
-    where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
-    :math:`padding` is zero-padding added to both sides of the input.
-    For output height and width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv2d
-    <https://www.mindspore.cn/docs/en/r2.0/api_python/nn/mindspore.nn.Conv2d.html>`_.
+    The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
+    where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
+    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
+    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
-    The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
-    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
-    `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
+    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
+    `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
     Note:
         On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
@@ -4648,12 +5425,13 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         weight (Tensor): Tensor of shape
             :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
             is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
-        bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default: None.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is ``None`` , zeros will be used. Default: ``None`` .
         stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
             the height and width of movement are both strides, or a tuple of two int numbers that
-            represent height and width of movement respectively. Default: 1.
+            represent height and width of movement respectively. Default: ``1`` .
         pad_mode (str, optional): Specifies padding mode. The optional values are
-            "same", "valid" and "pad". Default: "valid".
+            ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
             - same: Adopts the way of completion. The height and width of the output will be equal to
               the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
@@ -4665,18 +5443,21 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
             - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
               Tensor borders. `padding` must be greater than or equal to 0.
-        padding (Union(int, tuple[int]), optional): Implicit paddings on both sides of the input `x`.
+        padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`.
             If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
-            If `padding` is a tuple with two integers, the padding of top adn bottom is padding[0], and the padding of
-            left and right is padding[1]. Default: 0.
+            If `padding` is a tuple/list with 2 integers, the padding of top adn bottom is padding[0],
+            and the padding of left and right is padding[1]. Default: ``0`` .
         dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
             2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
             there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
-            be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
-        groups (int, optional): Splits `input` into groups. Default: 1.
+            be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
+        groups (int, optional): Splits `input` into groups. Default: ``1`` .
     Returns:
         Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv2d` for more details.
     Raises:
         TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -4685,29 +5466,45 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         ValueError: If  the shape of `bias` is not :math:`C_{out}` .
         ValueError: If `stride` or `dilation` is less than 1.
         ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
-        ValueError: If `padding` is a tuple whose length is not equal to 2.
+        ValueError: If `padding` is a tuple/list whose length is not equal to 2.
         ValueError: If `pad_mode` is not equal to 'pad' and `padding` is greater than 0.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend`` ``GPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
         >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
         >>> output = ops.conv2d(x, weight)
         >>> print(output.shape)
         (10, 32, 30, 30)
     """
+    if isinstance(stride, (tuple, list)):
+        _check_conv_iterable_lengths(stride, dim=2, iter_name='stride')
+    if isinstance(dilation, (tuple, list)):
+        _check_conv_iterable_lengths(dilation, dim=2, iter_name='dilation')
     if isinstance(padding, (tuple, list)):
         padding = _manipulate_padding(padding, dim=2)
     weight_shape = weight.shape
     out_channel = weight_shape[0]
     kernel_size = weight_shape[2:4]
+    input_shape = input.shape
+    in_channel = input_shape[1]
+    if not (in_channel % groups == 0 and out_channel % groups == 0):
+        raise ValueError(f"The argument 'groups' should be divisible by 'in_channel' " \
+                         f"and 'out_channel', but got group:{groups}, in_channel:{in_channel}, " \
+                         f"out_channel:{out_channel}.")
     conv = _get_cache_prim(P.Conv2D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCHW")
     if bias is None:
         return conv(input, weight)
     if not isinstance(bias, Tensor):
         raise TypeError(f"For 'conv2d', the 'bias' must be a Tensor, but got {type(bias)}.")
+    if bias.shape[0] != out_channel:
+        raise ValueError(f"For 'conv2d', Given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
+                        f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
     conv_result = conv(input, weight)
     output = bias_add(conv_result, bias)
     return output
@@ -4723,24 +5520,27 @@ def hardsigmoid(input):
     .. math::
-        \text{hsigmoid}(x_{i}) = max(0, min(1, \frac{x_{i} + 3}{6}))
+        \text{hsigmoid}(x_{i}) = \max(0, \min(1, \frac{x_{i} + 3}{6}))
     where :math:`x_i` is an element of the input Tensor.
     Args:
-        input (Tensor): Hard Sigmoid input, with float16, float32 or float64 data type.
+        input (Tensor): The input Tensor.
     Returns:
         A Tensor whose dtype and shape are the same as `input`.
     Raises:
         TypeError: If `input` is not a Tensor.
-        TypeError: If dtype of `input` is not float16, float32 or float64.
+        TypeError: If dtype of `input` is not int or float.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([ -3.5,  0,  4.3]), mindspore.float32)
         >>> output = ops.hardsigmoid(x)
         >>> print(output)
@@ -4765,8 +5565,8 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
     Args:
         input (Tensor): Input Tensor.
-        min_val (Union[int, float]): Minimum value of the linear region range. Default: -1.0.
-        max_val (Union[int, float]): Maximum value of the linear region range. Default: 1.0.
+        min_val (Union[int, float]): Minimum value of the linear region range. Default: ``-1.0`` .
+        max_val (Union[int, float]): Maximum value of the linear region range. Default: ``1.0`` .
     Returns:
         Tensor, with the same dtype and shape as `input`.
@@ -4780,6 +5580,8 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor([-1, -2, 0, 2, 1], mindspore.float16)
         >>> output = ops.hardtanh(x, min_val=-1.0, max_val=1.0)
         >>> print(output)
@@ -4788,9 +5590,10 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
     _check_is_tensor('input', input, "hardtanh")
     _check_value_type("min_val", min_val, [int, float], "hardtanh")
     _check_value_type("max_val", max_val, [int, float], "hardtanh")
+    input_dtype = input.dtype
     input = _get_cache_prim(P.Maximum)()(input, min_val)
     input = _get_cache_prim(P.Minimum)()(input, max_val)
-    return input
+    return input.astype(input_dtype)
 def huber_loss(input, target, reduction='mean', delta=1.0):
@@ -4798,8 +5601,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
     Calculates the error between the predicted value and the target value,
     which has the best of both the loss of l1 and the loss of mse.
-    Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter is set to "none"
-    then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction. The formula is as follows:
+    Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
+    is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
+    The formula is as follows:
     .. math::
         \ell(x, y) = L = \{l_1,\dots,l_N\}^\top
@@ -4828,26 +5632,33 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
         target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
             However, when the shape of `target` is different from the shape of `input`,
             and they should be broadcasted to each other.
-        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum" and "none".
-            Default: "mean".
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
         delta (Union[int, float]): The threshold to change between two type of loss.
-            The value must be greater than zero. Default: 1.0.
+            The value must be greater than zero. Default: ``1.0`` .
     Returns:
-        Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
+        Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
         Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `input` or `target` is not a Tensor.
         TypeError: If dtype of `delta` is neither float nor int.
         ValueError: If `delta` is less than or equal to 0.
-        ValueError: If `reduction` is not one of "none", "mean", "sum".
+        ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
         ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor([1, 2, 10, 2], mindspore.float32)
         >>> target = Tensor([1, 5, 1, 20], mindspore.float32)
         >>> output = ops.huber_loss(x, target, reduction="mean", delta=2)
@@ -4907,6 +5718,9 @@ def adaptive_avg_pool1d(input, output_size):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
         >>> output = ops.adaptive_avg_pool1d(input, output_size=2)
         >>> print(output.shape)
@@ -4968,7 +5782,7 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
         y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
     where :math:`\gamma` is `weight`, :math:`\beta` is `bias`, :math:`\epsilon` is `eps`, :math:`mean` is the
-    mean of `x`, :math:`variance` is the variance of `x`.
+    mean of :math:`x`, :math:`variance` is the variance of :math:`x`.
     .. warning::
         - For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
@@ -4984,11 +5798,11 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
         weight (Union[Tensor, Parameter]): The shape :math:`(C,)`, with float16 or float32 data type.
         bias (Union[Tensor, Parameter]): The shape :math:`(C,)`, has the same data type with `weight`.
         training (bool, optional): If `training` is `True`, `mean` and `variance` are computed during training.
-            If `training` is `False`, they're loaded from checkpoint during inference. Default: False.
+            If `training` is `False`, they're loaded from checkpoint during inference. Default: ``False`` .
         momentum (float, optional): The hyper parameter to compute moving average for `running_mean` and `running_var`
             (e.g. :math:`new\_running\_mean = (1 - momentum) * running\_mean + momentum * current\_mean`).
-            Momentum value must be `[0, 1]`. Default: 0.1.
-        eps (float, optional): A small value added for numerical stability. Default: 1e-5.
+            Momentum value must be `[0, 1]`. Default: ``0.1`` .
+        eps (float, optional): A small value added for numerical stability. Default: ``1e-5``, value must be `(0, 1]` .
     Returns:
         output_x (Tensor) - The same type and shape as the `input_x`. The shape is :math:`(N, C)`.
@@ -5003,11 +5817,13 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> input_x = Tensor([[1.0, 2.0], [3.0, 4.0]], dtype.float32)
-        >>> running_mean = Tensor([0.5, 1.5], dtype.float32)
-        >>> running_var = Tensor([0.1, 0.2], dtype.float32)
-        >>> weight = Tensor([2.0, 2.0], dtype.float32)
-        >>> bias = Tensor([-1.0, -1.0], dtype.float32)
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> input_x = Tensor([[1.0, 2.0], [3.0, 4.0]], mindspore.float32)
+        >>> running_mean = Tensor([0.5, 1.5], mindspore.float32)
+        >>> running_var = Tensor([0.1, 0.2], mindspore.float32)
+        >>> weight = Tensor([2.0, 2.0], mindspore.float32)
+        >>> bias = Tensor([-1.0, -1.0], mindspore.float32)
         >>> output = ops.batch_norm(input_x, running_mean, running_var, weight, bias)
         >>> print(output)
         [[ 2.1621194  1.2360122]
@@ -5024,21 +5840,29 @@ def bias_add(input_x, bias):
     consistent with the shape of the `input_x` Tensor.
     Args:
-        input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
-        bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of `input_x`.
+        input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
+            - Ascend/CPU: all Number type.
+            - GPU: float16, float32, int8.
+        bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
+            `input_x`. It has the same type as `input_x`.
     Returns:
         Tensor, with the same shape and data type as `input_x`.
     Raises:
         TypeError: If `input_x` or `bias` is not a Tensor.
-        TypeError: If dtype of `input_x` or `bias` is inconsistent.
+        TypeError: If dtype of `input_x` and `bias` is inconsistent.
         TypeError: If dimension of `input_x` is not in the range [2, 5].
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.arange(6).reshape((2, 3)), mindspore.float32)
         >>> bias = Tensor(np.random.random(3).reshape((3)), mindspore.float32)
         >>> output = ops.bias_add(input_x, bias)
@@ -5080,13 +5904,16 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
         labels (Tensor): The target value which has the same shape and data type as `logits`.
         weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
             Its shape must be able to broadcast to that of `logits` and `labels`.
-            And it must have the same shape and data type as `logits`. Default: None. If set to None, the loss function
+            And it must have the same shape and data type as `logits`. Default: ``None`` . If set to ``None`` ,
+            the loss function
             will not consider any sample weights, and each sample will be treated as having equal importance
             when calculating the loss.
-        reduction (str, optional): Specify the protocol calculation method used to output the results.
-            Its value must be one of 'none', 'mean' or 'sum', respectively indicate that no calculation method is
-            specified, using the average value for calculation, and using summation for calculation, not case-sensitive.
-            Default: 'mean'.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
         Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
@@ -5095,13 +5922,16 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
     Raises:
         TypeError: If `logits`, `labels` or `weight` is not a Tensor.
         TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
-        ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
         ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
         >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32)
         >>> weight = Tensor(np.array([1, 2, 2]), mindspore.float32)
@@ -5115,35 +5945,49 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
 def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
     r"""
-    Applies a 3D convolution over an input tensor. The input tensor is typically of shape
-    :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
-    :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
-    :math:`D` is depth, :math:`H, W` is feature height and width respectively.
-    the output value of a layer is calculated as:
+    Applies a 3D convolution over an input tensor. The input tensor is typically of
+    shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
+    is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
+    The output is calculated based on formula:
     .. math::
-        \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
-        \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
-        \operatorname{input}\left(N_{i}, k\right))
-    where :math:`k` is kernel,
-    :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
-    :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
-    the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
-    is a convolution kernel slice with shape
-    :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
-    where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
-    the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
-    and :math:`\text{X}` is the input tensor.
-    The shape of full convolution kernel is
-    :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
-    where `groups` is the number of groups to split `input` in the channel dimension.
-    For more details, please refer to the paper `Gradient Based Learning Applied to Document
-    Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
+    where :math:`bias` is the output channel bias, :math:`ccor` is
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
+    Here are the indices' meanings:
+    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
+    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
+      output channels, which is also equal to the number of kernels.
+    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+      input channels, which is also equal to the number of channels in the convolutional kernels.
+    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
+    channel in the :math:`i`-th batch of the input feature map.
+    The shape of the convolutional kernel is given by
+    :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
+    where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
+    height and width of the kernel, respectively.
+    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
+    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
+    \text{kernel_size[1]}, \text{kernel_size[2]})`,
+    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
+    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
     Note:
-        1. On Ascend platform, :math:`groups = 1` must be satisfied.
+        1. On Ascend platform, `groups = 1` must be satisfied.
         2. On Ascend dilation on depth only supports the case of 1.
     Args:
@@ -5151,45 +5995,45 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
             \text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
             \text{kernel_size[1]}, \text{kernel_size[1]})`.
-        bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default:
-            None.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is None, zeros will be used. Default: ``None`` .
         stride (Union[int, tuple[int]], optional): The distance of kernel moving,
             it can be an int number that represents
             the depth, height and width of movement or a tuple of three int numbers that
-            represent depth, height and width movement respectively. Default: 1.
+            represent depth, height and width movement respectively. Default: ``1`` .
         pad_mode (str, optional): Specifies padding mode. The optional values are
-            "same", "valid" and "pad". Default: "valid".
+            ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
-            - same: Adopts the way of completion. The depth, height and width of the output will be equal to
+            - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
               the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
               left and right directions possiblily.
               Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
               If this mode is set, `pad` must be 0.
-            - valid: Adopts the way of discarding. The possible largest depth, height and width of output
+            - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
               will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
               must be 0.
-            - pad: Implicit paddings on both sides of the input in depth, height and width. The number of `pad` will
-              be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
+            - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
+              The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
-        padding (Union[int, tuple[int]], optional): The pad value to be filled. If `pad` is an integer,
+        padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
             the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
-            If `pad` is a tuple of 3 integers, the padding of head, tail, top, bottom,
-            left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: 0.
+            If `pad` is a tuple/list of 3 integers, the padding of head, tail, top, bottom,
+            left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: ``0`` .
         dilation (Union[int, tuple[int]], optional): The data type is int or a tuple of 3 integers
             :math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1
             on Ascend backend. Specifies the dilation rate to use for dilated convolution. If set :math:`k > 1`,
             there will be :math:`k - 1` pixels skipped for each sampling location.
             The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
-            respectively. Default: 1.
+            respectively. Default: ``1`` .
         groups (int, optional):The number of groups into which the filter is divided. `in_channels`
-            and `out_channels` must be divisible by `group`. Default: 1.
+            and `out_channels` must be divisible by `group`. Default: ``1`` .
     Returns:
         Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
-        `pad_mode` is 'same':
+        `pad_mode` is ``"same"``:
         .. math::
             \begin{array}{ll} \\
@@ -5198,7 +6042,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
                 W_{out} ＝ \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
             \end{array}
-        `pad_mode` is 'valid':
+        `pad_mode` is ``"valid"``:
         .. math::
             \begin{array}{ll} \\
@@ -5210,7 +6054,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
                 {\text{stride[2]}} + 1} \right \rfloor \\
             \end{array}
-        `pad_mode` is 'pad':
+        `pad_mode` is ``"pad"``:
         .. math::
             \begin{array}{ll} \\
@@ -5229,13 +6073,16 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         ValueError: If the shape of `bias` is not :math:`C_{out}`.
         ValueError: If `stride` or `dilation` is less than 1.
         ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
-        ValueError: If `padding` is a tuple whose length is not equal to 4.
+        ValueError: If `padding` is a tuple or list whose length is not equal to 3.
         ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend`` ``GPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
         >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
         >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
@@ -5251,6 +6098,15 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     weight_shape = weight.shape
     out_channel = weight_shape[0]
     kernel_size = weight_shape[2:5]
+    if isinstance(stride, (tuple, list)):
+        _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
+    if isinstance(dilation, (tuple, list)):
+        _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
+    input_shape = input.shape
+    in_channel = input_shape[1]
+    if not (in_channel % groups == 0 and out_channel % groups == 0):
+        raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
+                        "and 'out_channel'")
     if isinstance(padding, (list, tuple)):
         padding = _manipulate_padding(padding, dim=3)
     conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
@@ -5263,7 +6119,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     return output
-@constexpr
+@_primexpr
 def _check_positive_int(arg_value, arg_name=None, prim_name=None):
     validator.check_positive_int(arg_value, arg_name=arg_name, prim_name=prim_name)
@@ -5275,6 +6131,12 @@ def _check_pxiel_shuffle_valid(num, factor):
                          "by `upscale_factor` squared.")
+def _check_pixel_shuffle_unshuffle_input_shape(input, cls_name):
+    """Internal function, used to check whether the shape of pixel shuffle or unshuffle input meets the requirements."""
+    if input.ndim < 3:
+        raise ValueError(f"For {cls_name}, the dimension of `input` should be larger than 2, but got {input.ndim}.")
 def pixel_shuffle(input, upscale_factor):
     r"""
     Applies the PixelShuffle operation over input `input` which implements sub-pixel convolutions
@@ -5297,12 +6159,16 @@ def pixel_shuffle(input, upscale_factor):
     Raises:
         ValueError: If `upscale_factor` is not a positive integer.
         ValueError: If the length of third to last dimension is not divisible by `upscale_factor` squared.
-        TypeError: If the dimension of `input` is less than 3.
+        ValueError: If the dimension of `input` is less than 3.
+        TypeError: If `input` is not a Tensor.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import ops
         >>> input_x = np.arange(3 * 2 * 9 * 4 * 4).reshape((3, 2, 9, 4, 4))
         >>> input_x = mindspore.Tensor(input_x, mindspore.dtype.int32)
         >>> output = ops.pixel_shuffle(input_x, 3)
@@ -5310,10 +6176,10 @@ def pixel_shuffle(input, upscale_factor):
         (3, 2, 1, 12, 12)
     """
     _check_positive_int(upscale_factor, "upscale_factor")
+    _check_is_tensor("input", input, "pixel_shuffle")
+    _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_shuffle")
     idx = P.Shape()(input)
-    length = len(idx)
-    if length < 3:
-        raise TypeError(f"For pixel_shuffle, the dimension of `input` should be larger than 2, but got {length}.")
+    length = input.ndim
     pre = idx[:-3]
     c, h, w = idx[-3:]
     _check_pxiel_shuffle_valid(c, upscale_factor)
@@ -5358,12 +6224,16 @@ def pixel_unshuffle(input, downscale_factor):
     Raises:
         ValueError: If `downscale_factor` is not a positive integer.
         ValueError: If the length of second to last dimension or last dimension is not divisible by `downscale_factor` .
-        TypeError: If the dimension of `input` is less than 3.
+        ValueError: If the dimension of `input` is less than 3.
+        TypeError: If `input` is not a Tensor.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> input_x = np.arange(8 * 8).reshape((1, 1, 8, 8))
         >>> input_x = mindspore.Tensor(input_x, mindspore.dtype.int32)
         >>> output = ops.pixel_unshuffle(input_x, 2)
@@ -5371,10 +6241,10 @@ def pixel_unshuffle(input, downscale_factor):
         (1, 4, 4, 4)
     """
     _check_positive_int(downscale_factor, "downscale_factor")
+    _check_is_tensor("input", input, "pixel_unshuffle")
+    _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_unshuffle")
     idx = P.Shape()(input)
-    length = len(idx)
-    if length < 3:
-        raise TypeError(f"For pixel_unshuffle, the dimension of `input` should be larger than 2, but got {length}.")
+    length = input.ndim
     pre = idx[:-3]
     c, h, w = idx[-3:]
     _check_pxiel_unshuffle_valid(h, w, downscale_factor)
@@ -5407,7 +6277,7 @@ def glu(x, axis=-1):
     Args:
         x (Tensor): Tensor to be splited. Its dtype is Number, and shape is :math:`(\ast_1, N, \ast_2)`
             where `*` means, any number of additional dimensions.
-        axis (int, optional): the axis to split the input. It must be int. Default: -1, the last axis of `x`.
+        axis (int, optional): the axis to split the input. It must be int. Default: ``-1`` , the last axis of `x`.
     Returns:
         Tensor, the same dtype as the `x`, with the shape :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`.
@@ -5417,9 +6287,10 @@ def glu(x, axis=-1):
         TypeError: If `x` is not a Tensor.
     Supported Platforms:
-        ``Ascend`` ``CPU``
+        ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor([[0.1,0.2,0.3,0.4],[0.5,0.6,0.7,0.8]])
         >>> output = ops.glu(input)
         >>> print(output)
@@ -5455,19 +6326,19 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
             It is :math:`x` in the above formula.
         target (Tensor): Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The
             value of target should be non-negative, less than C. It is :math:`y` in the above formula.
-        p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: 1.
-        margin (int, optional): A parameter to change pairwise distance. Default: 1.
+        p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` .
+        margin (int, optional): A parameter to change pairwise distance. Default: ``1`` .
         weight (Tensor, optional): The rescaling weight to each class with shape :math:`(C,)`. Data type only
-            support float16, float32 or float64. Default: None.
-        reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
-            'sum'. Default: 'mean'.
+            support float16, float32 or float64. Default: ``None`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
-            - 'none': no reduction will be applied.
-            - 'mean': the sum of the output will be divided by the number of elements in the output.
-            - 'sum': the output will be summed.
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor. If `reduction` is 'none', returns a Tensor with the same shape as `target`.
+        Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
         Otherwise, it is a scalar.
     Raises:
@@ -5480,12 +6351,15 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
         ValueError: If `reduction` is not one of {'none','sum','mean'}.
         ValueError: If shape[0] of `input` is not equal to shape[0] of `target`.
         ValueError: If shape[1] of `input` is not equal to shape[0] of `weight`.
-        ValueError: If rank of `weight` is not 1 or  rank of `target` is not 1 or `input` is not 2.
+        ValueError: If rank of `weight` is not 1 or rank of `target` is not 1 or `input` is not 2.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> inputs = Tensor(np.ones(shape=[3, 3]), mindspore.float32)
         >>> target = Tensor(np.array([1, 2, 1]), mindspore.int64)
         >>> weight = Tensor(np.array([1, 1, 1]), mindspore.float32)
@@ -5528,16 +6402,17 @@ def multilabel_margin_loss(input, target, reduction='mean'):
             is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32.
         target (Tensor): Ground truth data, with the same shape as `input`, data type must be int32 and
             label targets padded by -1.
-        reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
-            'sum'. Default: 'mean'.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
-            - 'none': no reduction will be applied.
-            - 'mean': the sum of the output will be divided by the number of elements in the output.
-            - 'sum': the output will be summed.
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
-          is :math:`(N)`. Otherwise, a scalar value will be returned.
+        - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
+          If `reduction` is ``"none"``, its shape is :math:`(N)`.
+          Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `input` or `target` is not a Tensor.
@@ -5545,12 +6420,15 @@ def multilabel_margin_loss(input, target, reduction='mean'):
         TypeError: If dtype of `target` is not int32.
         ValueError: If length of shape of `input` is neither 1 nor 2.
         ValueError: If shape of `input` is not the same as `target`.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
     Supported Platforms:
         ``Ascend`` ``GPU``
     Examples:
+       >>> import mindspore
+       >>> import numpy as np
+       >>> from mindspore import Tensor, ops
        >>> inputs = Tensor(np.array([[0.1, 0.2, 0.4, 0.8], [0.2, 0.3, 0.5, 0.7]]), mindspore.float32)
        >>> target = Tensor(np.array([[1, 2, 0, 3], [2, 3, -1, 1]]), mindspore.int32)
        >>> output = ops.multilabel_margin_loss(inputs, target)
@@ -5585,15 +6463,19 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
     multiply to the loss of each class if given.
     Args:
-        input (Tensor): A tensor of shape (N, C), where N is batch size and C is number of classes.
+        input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
         target (Tensor): The label target Tensor which has the same shape as `input`.
-        weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: None.
-        reduction (str): Specifies which reduction to be applied to the output. It must be one of
-            'none', 'mean', and 'sum', meaning no reduction, reduce mean and sum on output, respectively.
-            Default: 'mean'.
+        weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the weighted mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor, the data type is the same as input, if the reduction is 'none', its shape is (N), otherwise it is zero.
+        Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
+        its shape is :math:`(N)` , otherwise it is zero.
     Raises:
         ValueError: If the rank of `input` or `target` is not 2.
@@ -5602,6 +6484,7 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor([[0.3, 0.6, 0.6], [0.9, 0.4, 0.2]])
         >>> target = Tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 1.0]])
         >>> loss = ops.multilabel_soft_margin_loss(input, target, reduction='mean')
@@ -5659,7 +6542,7 @@ def elu(input_x, alpha=1.0):
     Args:
         input_x (Tensor): The input of ELU is a Tensor of any dimension with data type of float16 or float32.
         alpha (float, optional): The alpha value of ELU, the data type is float. Only support '1.0' currently.
-            Default: 1.0.
+            Default: ``1.0`` .
     Returns:
         Tensor, has the same shape and data type as `input_x`.
@@ -5673,6 +6556,9 @@ def elu(input_x, alpha=1.0):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
         >>> output = ops.elu(x)
         >>> print(output)
@@ -5701,12 +6587,12 @@ def gelu(input_x, approximate='none'):
     When `approximate` argument is `tanh`, GeLU is estimated with:
     .. math::
-        GELU(x_i) = 0.5 * x_i * (1 + tanh(\sqrt(2 / \pi) * (x_i + 0.044715 * x_i^3)))
+        GELU(x_i) = 0.5 * x_i * (1 + \tanh(\sqrt(2 / \pi) * (x_i + 0.044715 * x_i^3)))
     Args:
         input_x (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
-        approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are 'none' and 'tanh'.
-            Default: 'none'.
+        approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are ``'none'`` and ``'tanh'`` .
+            Default: ``'none'`` .
     Returns:
         Tensor, with the same type and shape as `input_x`.
@@ -5720,6 +6606,8 @@ def gelu(input_x, approximate='none'):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor([1.0, 2.0, 3.0], mindspore.float32)
         >>> result = ops.gelu(x)
         >>> print(result)
@@ -5728,21 +6616,24 @@ def gelu(input_x, approximate='none'):
     if approximate not in ['none', 'tanh']:
         raise ValueError("For ops.gelu, approximate value should be either 'none' or 'tanh'.")
-    output = _get_cache_prim(P.GeLU)()(input_x)
+    x_dtype = _get_cache_prim(P.DType)()(input_x)
+    if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
+        raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
+                        f"but got {x_dtype}.")
     if approximate == 'tanh':
-        output = _get_cache_prim(P.Pow)()(input_x, Tensor([3]))
-        output = output * Tensor([0.044715]) + input_x
-        output = output * _get_cache_prim(P.Sqrt)()(Tensor(2.0 / pi))
-        output = _get_cache_prim(P.Tanh)()(output) + Tensor([1.0])
-        output = output * input_x * Tensor([0.5])
+        output = _get_cache_prim(P.GeLU)()(input_x)
+    else:
+        output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
+        output = _get_cache_prim(P.Div)()(input_x, output)
+        output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
+        output = input_x * output * Tensor(0.5, x_dtype)
     return output
 def channel_shuffle(x, groups):
     r"""
-    Divide the channels in a tensor of shape :math:`(*, C, H, W)` into g groups and
+    Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` groups and
     rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while keeping the original tensor shapes.
     Args:
@@ -5764,6 +6655,9 @@ def channel_shuffle(x, groups):
         ``Ascend`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> group = 2
         >>> x = Tensor(np.arange(1* 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
         >>> y = mindspore.ops.channel_shuffle(x, group)
@@ -5813,8 +6707,8 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
         kernel_size (int): The size of kernel window.
         stride (int): The distance of kernel moving, an int number that represents
-            the width of movement is stride, if the value is None, the default value `kernel_size` is used;
-        ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: False.
+            the width of movement is stride. Default: ``None`` , which indicates the moving step is `kernel_size` .
+        ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: ``False`` .
     Returns:
         - **output** (Tensor) - LPPool1d result, with shape :math:`(N, C, L_{out})` or :math:`(C, L_{out})`,
@@ -5894,9 +6788,9 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
             or a tuple of two int numbers that represent height and width respectively.
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the height and width of movement are both strides, or a tuple of two int numbers that
-            represent height and width of movement respectively, if the value is None,
-            the default value `kernel_size` is used.
-        ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: False.
+            represent height and width of movement respectively.
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
+        ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: ``False`` .
     Returns:
         - **output** (Tensor) - LPPool2d result, with shape :math:`(N, C, H_{in}, W_{in})`,
@@ -5970,21 +6864,28 @@ def mse_loss(input, target, reduction='mean'):
         target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
             However, it supports that the shape of `input` is different from the shape of `target`
             and they should be broadcasted to each other.
-        reduction (str, optional): Type of reduction to be applied to loss.
-            The optional values are "mean", "none" and "sum". Default: "mean".
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor, loss of type float, the shape is zero if `reduction` is 'mean' or 'sum',
-        while the shape of output is the broadcasted shape if `reduction` is 'none'.
+        Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
+        while the shape of output is the broadcasted shape if `reduction` is ``'none'`` .
     Raises:
-        ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
+        ValueError: If `reduction` is not one of ``'none'`` , ``'mean'`` or ``'sum'``.
         ValueError: If `input` and `target` have different shapes and cannot be broadcasted.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
         >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
         >>> output = ops.mse_loss(logits, labels, reduction='none')
@@ -6000,7 +6901,11 @@ def mse_loss(input, target, reduction='mean'):
         raise ValueError("For ops.mse_loss, `reduction` value should be either 'mean', 'none' or 'sum'.")
     x = _get_cache_prim(P.Square)()(input - target)
-    input_dtype = x.dtype
+    float_type = (mstype.float16, mstype.float32, mstype.float64)
+    if x.dtype not in float_type:
+        input_dtype = mstype.float32
+    else:
+        input_dtype = x.dtype
     x = _get_cache_prim(P.Cast)()(x, mstype.float32)
     average_flag = True
@@ -6010,15 +6915,11 @@ def mse_loss(input, target, reduction='mean'):
     if reduction == 'none':
         reduce_flag = False
-    perm = _get_cache_prim(P.Range)()(Tensor(0, mstype.int32),
-                                      Tensor(len(x.shape), mstype.int32),
-                                      Tensor(1, mstype.int32))
     if reduce_flag and average_flag:
-        x = _get_cache_prim(P.ReduceMean)()(x, perm)
+        x = _get_cache_prim(P.ReduceMean)()(x, _get_axis(x))
     if reduce_flag and not average_flag:
-        x = _get_cache_prim(P.ReduceSum)()(x, perm)
+        x = _get_cache_prim(P.ReduceSum)()(x, _get_axis(x))
     return _get_cache_prim(P.Cast)()(x, input_dtype)
@@ -6066,16 +6967,20 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
             as `anchor`.
         negative (Tensor): A sample belonging to the different class from `anchor`, with the same type and shape
             as `anchor`.
-        margin (float, optional): Make a margin between the positive pair and the negative pair. Default: 1.0.
-        p (int, optional): The degree of norm for pairwise distance. Default: 2.
-        eps (float, optional): Add small value to avoid division by zero. Default: 1e-06.
+        margin (float, optional): Make a margin between the positive pair and the negative pair. Default: ``1.0`` .
+        p (int, optional): The degree of norm for pairwise distance. Default: ``2`` .
+        eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
         swap (bool, optional): The distance swap change the negative distance to the distance between positive
-            sample and negative sample. Default: "False".
-        reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean', 'sum'.
-            Default: "mean".
+            sample and negative sample. Default: ``False`` .
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor. If `reduction` is "none", its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
+        Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
@@ -6088,12 +6993,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
             same time.
         ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
         ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
     Supported Platforms:
         ``GPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> anchor = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
         >>> positive = Tensor(np.array([[0.4, 0.6], [0.4, 0.6]]), mindspore.float32)
         >>> negative = Tensor(np.array([[0.2, 0.9], [0.3, 0.7]]), mindspore.float32)
@@ -6120,7 +7028,7 @@ def linear(x, w, b):
 def _inner_dropout(x, p, training):
     """inner dropout"""
     _dropout = _get_cache_prim(P.Dropout)(1 - p)
-    if p > 0. and training:
+    if 0. < p <= 1. and training:
         return _dropout(x)[0]
     return x
@@ -6173,10 +7081,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
     return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
-def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
+def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
     """scaled dot product attention"""
     embed_size = query.shape[-1]
-    scaling_factor = Tensor(embed_size, mstype.float32).sqrt().sqrt()
+    embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
+    scaling_factor = embed_size_tensor.sqrt().sqrt()
     query = query / scaling_factor
     if is_causal:
@@ -6194,7 +7103,7 @@ def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_ca
     return (output, attn)
-@constexpr
+@_primexpr
 def _check_qkv_shape(query_ndim, key_ndim, value_ndim):
     """Check the expected shape for `query, `key`, `value` and returns whether the input is batched."""
     # Shape check.
@@ -6217,7 +7126,7 @@ def _check_qkv_shape(query_ndim, key_ndim, value_ndim):
     return is_batched
-@constexpr
+@_primexpr
 def _check_kpm_shape(query_ndim, kmp_ndim):
     """check key_padding_mask shape"""
     if query_ndim == 3:
@@ -6231,7 +7140,7 @@ def _check_kpm_shape(query_ndim, kmp_ndim):
                              f"but got `key_padding_mask` with {kmp_ndim}D.")
-@constexpr
+@_primexpr
 def _check_attn_mask_shape(query_ndim, query_shape, key_shape, attn_mask_ndim,
                            attn_mask_shape, num_heads):
     """
@@ -6269,7 +7178,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
                                  out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
                                  use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
                                  v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
-                                 is_causal=False, k_is_v=False, q_is_k=False):
+                                 is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
     """multi head attetion forward function"""
     is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
     if key_padding_mask is not None:
@@ -6413,8 +7322,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
     if attn_mask is not None and attn_mask.dtype == mstype.bool_:
         new_attn_mask = ops.zeros_like(attn_mask, dtype=q.dtype)
-        new_attn_mask.masked_fill(attn_mask, float("-inf"))
-        attn_mask = new_attn_mask
+        attn_mask = new_attn_mask.masked_fill(attn_mask, float("-inf"))
     if attn_mask is not None:
         if attn_mask.shape[0] == 1:
@@ -6427,7 +7335,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
     v = v.view((bsz, num_heads, src_len, head_dim))
     attn_output, attn_output_weights = _scaled_dot_product_attention(
-        q, k, v, attn_mask, dropout_p, is_causal, training)
+        q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
     attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
     attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
@@ -6443,11 +7351,11 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
     return attn_output, attn_output_weights
-def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
+def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False):
     r"""
     Performs a 2D max pooling on the input Tensor.
-    Typically the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs
+    Typically, the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs
     regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given `kernel_size`
     :math:`ks = (h_{ker}, w_{ker})` and `stride` :math:`s = (s_0, s_1)`, the operation is as follows:
@@ -6458,27 +7366,38 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
     Args:
         x (Tensor): Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8,
-            int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
+            int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64 in CPU or GPU
+            while that of uint16 in Ascend.
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
             value, is an int number that represents height and width of the kernel, or a tuple of
             two int numbers that represent height and width respectively.
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the height and width of movement are both stride, or a tuple of two int numbers that
-            represent height and width of movement respectively. Default: `kernel_size`.
+            represent height and width of movement respectively.
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
         padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both
             strides, or a tuple of two int numbers that represent height and width of movement respectively.
-            Default: 0.
-        dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: 1.
-        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
-        return_indices (bool): Whether to output the indices of max value. Default: False.
+            Default: ``0`` .
+        dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
+        return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
+        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
     Returns:
-        If `return_indices` is False, return a Tensor `output`, else return a tuple (`output`, `argmax`).
+        If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`).
         - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
           It has the same data type as `x`.
-        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
-          only when `return_indices` is True.
+        .. math::
+            H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
+                \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
+        .. math::
+            W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
+                \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
+        - **argmax** (Tensor) - Index corresponding to the maximum value. In CPU and GPU, data type is int64
+          while that is uint16 in Ascend. It will be return only when `return_indices` is True.
     Raises:
         TypeError: If `x` is not a Tensor.
@@ -6486,12 +7405,16 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
         TypeError: If `kernel_size` , `stride` , `padding` or `dilation` is not int or tuple.
         ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
         ValueError: If `padding` is less than 0.
-        TypeError: If `ceil_mode` is not bool
+        ValueError: If `padding` is more than half of `kernel_size`.
+        TypeError: If `ceil_mode` is not bool.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
         >>> output_tensor, argmax = ops.max_pool2d(x, kernel_size=(3, 2), stride=(2, 1), return_indices=True)
         >>> print(output_tensor.shape)
@@ -6520,6 +7443,7 @@ __all__ = [
     'avg_pool3d',
     'batch_norm',
     'bias_add',
+    'bidense',
     'binary_cross_entropy',
     'binary_cross_entropy_with_logits',
     'cosine_embedding_loss',
@@ -6527,6 +7451,7 @@ __all__ = [
     'max_pool3d',
     'kl_div',
     'celu',
+    'dense',
     'deformable_conv2d',
     'dropout1d',
     'dropout2d',
@@ -6553,8 +7478,10 @@ __all__ = [
     'softsign',
     'softshrink',
     'soft_shrink',
+    'softplus',
     'selu',
     'silu',
+    'soft_margin_loss',
     'softmax',
     'softmin',
     'pdist',