PyPI - mindspore - Versions diffs - 2.2.11__cp39-cp39-win_amd64.whl → 2.3.0__cp39-cp39-win_amd64.whl - Mend

mindspore 2.2.11__cp39-cp39-win_amd64.whl → 2.3.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (1151) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +7 -5
mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
mindspore/_checkparam.py +76 -18
mindspore/_extends/builtin_operations.py +2 -1
mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
mindspore/_extends/parse/__init__.py +18 -14
mindspore/_extends/parse/compile_config.py +258 -0
mindspore/_extends/parse/namespace.py +2 -2
mindspore/_extends/parse/parser.py +174 -62
mindspore/_extends/parse/resources.py +45 -14
mindspore/_extends/parse/standard_method.py +142 -240
mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
mindspore/_extends/pijit/pijit_func_white_list.py +343 -0
mindspore/_extends/remote/kernel_build_server.py +2 -0
mindspore/_profiler.py +30 -0
mindspore/amp.py +51 -24
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/adasum.py +1 -1
mindspore/boost/base.py +1 -1
mindspore/boost/boost_cell_wrapper.py +2 -2
mindspore/boost/grad_freeze.py +2 -2
mindspore/boost/group_loss_scale_manager.py +1 -1
mindspore/boost/less_batch_normalization.py +9 -6
mindspore/common/__init__.py +15 -4
mindspore/common/_jit_fallback_utils.py +2 -3
mindspore/common/_register_for_adapter.py +7 -0
mindspore/common/_register_for_recompute.py +48 -0
mindspore/common/_register_for_tensor.py +8 -9
mindspore/common/_stub_tensor.py +7 -1
mindspore/common/_utils.py +5 -17
mindspore/common/api.py +411 -106
mindspore/common/auto_dynamic_shape.py +27 -14
mindspore/common/dtype.py +17 -10
mindspore/common/dump.py +6 -8
mindspore/common/file_system.py +48 -0
mindspore/common/generator.py +260 -0
mindspore/common/hook_handle.py +51 -4
mindspore/common/initializer.py +1 -1
mindspore/common/jit_config.py +34 -14
mindspore/common/lazy_inline.py +72 -19
mindspore/common/mindir_util.py +12 -2
mindspore/common/mutable.py +79 -14
mindspore/common/no_inline.py +54 -0
mindspore/common/np_dtype.py +25 -0
mindspore/common/parameter.py +30 -11
mindspore/common/recompute.py +262 -0
mindspore/common/seed.py +9 -9
mindspore/common/sparse_tensor.py +272 -24
mindspore/common/symbol.py +122 -0
mindspore/common/tensor.py +468 -496
mindspore/communication/__init__.py +6 -11
mindspore/communication/_comm_helper.py +5 -0
mindspore/communication/comm_func.py +1140 -0
mindspore/communication/management.py +118 -102
mindspore/config/op_info.config +22 -54
mindspore/context.py +378 -65
mindspore/dataset/__init__.py +5 -5
mindspore/dataset/audio/__init__.py +6 -6
mindspore/dataset/audio/transforms.py +711 -158
mindspore/dataset/callback/ds_callback.py +2 -2
mindspore/dataset/engine/cache_client.py +2 -2
mindspore/dataset/engine/datasets.py +163 -83
mindspore/dataset/engine/datasets_audio.py +14 -14
mindspore/dataset/engine/datasets_standard_format.py +33 -3
mindspore/dataset/engine/datasets_text.py +38 -38
mindspore/dataset/engine/datasets_user_defined.py +78 -59
mindspore/dataset/engine/datasets_vision.py +77 -73
mindspore/dataset/engine/offload.py +5 -7
mindspore/dataset/engine/queue.py +56 -38
mindspore/dataset/engine/validators.py +11 -5
mindspore/dataset/text/__init__.py +3 -3
mindspore/dataset/text/transforms.py +408 -121
mindspore/dataset/text/utils.py +9 -9
mindspore/dataset/transforms/__init__.py +1 -1
mindspore/dataset/transforms/transforms.py +261 -76
mindspore/dataset/utils/browse_dataset.py +9 -9
mindspore/dataset/vision/__init__.py +8 -8
mindspore/dataset/vision/c_transforms.py +10 -10
mindspore/dataset/vision/py_transforms_util.py +3 -3
mindspore/dataset/vision/transforms.py +2844 -549
mindspore/dataset/vision/utils.py +161 -10
mindspore/dataset/vision/validators.py +14 -2
mindspore/dnnl.dll +0 -0
mindspore/experimental/optim/__init__.py +12 -2
mindspore/experimental/optim/adadelta.py +161 -0
mindspore/experimental/optim/adagrad.py +168 -0
mindspore/experimental/optim/adam.py +35 -34
mindspore/experimental/optim/adamax.py +170 -0
mindspore/experimental/optim/adamw.py +40 -16
mindspore/experimental/optim/asgd.py +153 -0
mindspore/experimental/optim/lr_scheduler.py +71 -127
mindspore/experimental/optim/nadam.py +157 -0
mindspore/experimental/optim/optimizer.py +15 -8
mindspore/experimental/optim/radam.py +194 -0
mindspore/experimental/optim/rmsprop.py +154 -0
mindspore/experimental/optim/rprop.py +164 -0
mindspore/experimental/optim/sgd.py +28 -19
mindspore/hal/__init__.py +40 -0
mindspore/hal/_ascend.py +57 -0
mindspore/hal/_base.py +57 -0
mindspore/hal/_cpu.py +56 -0
mindspore/hal/_gpu.py +57 -0
mindspore/hal/device.py +356 -0
mindspore/hal/event.py +179 -0
mindspore/hal/memory.py +326 -0
mindspore/hal/stream.py +339 -0
mindspore/include/api/data_type.h +2 -2
mindspore/include/api/dual_abi_helper.h +16 -3
mindspore/include/api/model.h +4 -3
mindspore/include/api/status.h +14 -0
mindspore/include/c_api/model_c.h +173 -0
mindspore/include/c_api/ms/base/types.h +1 -0
mindspore/include/c_api/types_c.h +19 -0
mindspore/include/dataset/execute.h +1 -3
mindspore/include/dataset/vision.h +54 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +2 -2
mindspore/mindrecord/__init__.py +5 -1
mindspore/mindrecord/config.py +809 -0
mindspore/mindrecord/filereader.py +25 -0
mindspore/mindrecord/filewriter.py +76 -58
mindspore/mindrecord/mindpage.py +40 -6
mindspore/mindrecord/shardutils.py +3 -2
mindspore/mindrecord/shardwriter.py +7 -0
mindspore/mindrecord/tools/cifar100_to_mr.py +53 -66
mindspore/mindrecord/tools/cifar10_to_mr.py +48 -63
mindspore/mindrecord/tools/csv_to_mr.py +7 -17
mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
mindspore/mindrecord/tools/mnist_to_mr.py +11 -21
mindspore/mindrecord/tools/tfrecord_to_mr.py +2 -10
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/mint/__init__.py +1137 -0
mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
mindspore/mint/nn/__init__.py +512 -0
mindspore/mint/nn/functional.py +573 -0
mindspore/mint/optim/__init__.py +24 -0
mindspore/mint/optim/adamw.py +185 -0
mindspore/multiprocessing/__init__.py +72 -0
mindspore/nn/__init__.py +1 -0
mindspore/nn/cell.py +213 -257
mindspore/nn/dynamic_lr.py +2 -2
mindspore/nn/extend/__init__.py +29 -0
mindspore/nn/extend/basic.py +140 -0
mindspore/nn/extend/embedding.py +143 -0
mindspore/{rewrite/ast_creator_register.py → nn/extend/layer/__init__.py} +9 -19
mindspore/nn/extend/layer/normalization.py +109 -0
mindspore/nn/extend/pooling.py +117 -0
mindspore/nn/layer/activation.py +84 -94
mindspore/nn/layer/basic.py +177 -82
mindspore/nn/layer/channel_shuffle.py +3 -16
mindspore/nn/layer/container.py +3 -3
mindspore/nn/layer/conv.py +75 -66
mindspore/nn/layer/embedding.py +103 -45
mindspore/nn/layer/embedding_service.py +531 -0
mindspore/nn/layer/embedding_service_layer.py +393 -0
mindspore/nn/layer/image.py +4 -7
mindspore/nn/layer/math.py +1 -1
mindspore/nn/layer/normalization.py +52 -66
mindspore/nn/layer/padding.py +30 -39
mindspore/nn/layer/pooling.py +18 -9
mindspore/nn/layer/rnn_cells.py +6 -16
mindspore/nn/layer/rnns.py +6 -5
mindspore/nn/layer/thor_layer.py +1 -2
mindspore/nn/layer/timedistributed.py +1 -1
mindspore/nn/layer/transformer.py +52 -50
mindspore/nn/learning_rate_schedule.py +6 -5
mindspore/nn/loss/loss.py +63 -84
mindspore/nn/optim/ada_grad.py +6 -4
mindspore/nn/optim/adadelta.py +3 -1
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +102 -181
mindspore/nn/optim/adamax.py +4 -2
mindspore/nn/optim/adasum.py +3 -3
mindspore/nn/optim/asgd.py +4 -2
mindspore/nn/optim/ftrl.py +31 -61
mindspore/nn/optim/lamb.py +5 -3
mindspore/nn/optim/lars.py +2 -2
mindspore/nn/optim/lazyadam.py +6 -4
mindspore/nn/optim/momentum.py +13 -25
mindspore/nn/optim/optimizer.py +6 -3
mindspore/nn/optim/proximal_ada_grad.py +4 -2
mindspore/nn/optim/rmsprop.py +9 -3
mindspore/nn/optim/rprop.py +4 -2
mindspore/nn/optim/sgd.py +7 -4
mindspore/nn/optim/thor.py +2 -2
mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
mindspore/nn/probability/distribution/beta.py +2 -2
mindspore/nn/probability/distribution/categorical.py +4 -6
mindspore/nn/probability/distribution/cauchy.py +2 -2
mindspore/nn/probability/distribution/exponential.py +2 -2
mindspore/nn/probability/distribution/geometric.py +1 -1
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/logistic.py +1 -1
mindspore/nn/probability/distribution/poisson.py +2 -2
mindspore/nn/probability/distribution/uniform.py +2 -2
mindspore/nn/reinforcement/_tensors_queue.py +13 -1
mindspore/nn/wrap/__init__.py +2 -1
mindspore/nn/wrap/cell_wrapper.py +58 -13
mindspore/nn/wrap/grad_reducer.py +148 -8
mindspore/nn/wrap/loss_scale.py +32 -9
mindspore/numpy/__init__.py +2 -0
mindspore/numpy/array_creations.py +2 -0
mindspore/numpy/array_ops.py +6 -6
mindspore/numpy/dtypes.py +3 -3
mindspore/numpy/fft.py +431 -0
mindspore/numpy/math_ops.py +61 -67
mindspore/numpy/utils.py +3 -0
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +8 -4
mindspore/ops/_grad_experimental/grad_array_ops.py +4 -160
mindspore/ops/_grad_experimental/grad_comm_ops.py +93 -36
mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
mindspore/ops/_grad_experimental/grad_math_ops.py +92 -287
mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/__init__.py +0 -1
mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
mindspore/ops/_op_impl/{cpu/concat.py → aicpu/generate_eod_mask.py} +16 -17
mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
mindspore/ops/_op_impl/cpu/__init__.py +1 -3
mindspore/ops/_op_impl/cpu/adam.py +2 -2
mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
mindspore/ops/_vmap/vmap_array_ops.py +164 -101
mindspore/ops/_vmap/vmap_base.py +8 -1
mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
mindspore/ops/_vmap/vmap_image_ops.py +70 -13
mindspore/ops/_vmap/vmap_math_ops.py +130 -58
mindspore/ops/_vmap/vmap_nn_ops.py +249 -115
mindspore/ops/_vmap/vmap_other_ops.py +1 -1
mindspore/ops/auto_generate/__init__.py +31 -0
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +231 -0
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +250 -0
mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
mindspore/ops/auto_generate/gen_extend_func.py +980 -0
mindspore/ops/auto_generate/gen_ops_def.py +6443 -0
mindspore/ops/auto_generate/gen_ops_prim.py +13167 -0
mindspore/ops/auto_generate/pyboost_inner_prim.py +429 -0
mindspore/ops/composite/__init__.py +5 -2
mindspore/ops/composite/base.py +121 -23
mindspore/ops/composite/math_ops.py +10 -49
mindspore/ops/composite/multitype_ops/_compile_utils.py +191 -618
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +6 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
mindspore/ops/deprecated.py +14 -3
mindspore/ops/extend/__init__.py +53 -0
mindspore/ops/extend/array_func.py +218 -0
mindspore/ops/extend/math_func.py +76 -0
mindspore/ops/extend/nn_func.py +308 -0
mindspore/ops/function/__init__.py +31 -11
mindspore/ops/function/array_func.py +848 -1736
mindspore/ops/function/clip_func.py +19 -31
mindspore/ops/function/debug_func.py +2 -5
mindspore/ops/function/fft_func.py +31 -0
mindspore/ops/function/grad/grad_func.py +27 -20
mindspore/ops/function/image_func.py +27 -21
mindspore/ops/function/linalg_func.py +30 -53
mindspore/ops/function/math_func.py +916 -2791
mindspore/ops/function/nn_func.py +1445 -889
mindspore/ops/function/other_func.py +6 -7
mindspore/ops/function/parameter_func.py +6 -92
mindspore/ops/function/random_func.py +254 -108
mindspore/ops/function/reshard_func.py +102 -0
mindspore/ops/function/sparse_func.py +4 -4
mindspore/ops/function/sparse_unary_func.py +11 -18
mindspore/ops/function/spectral_func.py +1 -1
mindspore/ops/function/vmap_func.py +15 -14
mindspore/ops/functional.py +342 -343
mindspore/ops/op_info_register.py +16 -43
mindspore/ops/operations/__init__.py +32 -23
mindspore/ops/operations/_embedding_cache_ops.py +1 -1
mindspore/ops/operations/_grad_ops.py +21 -853
mindspore/ops/operations/_infer_ops.py +19 -0
mindspore/ops/operations/_inner_ops.py +155 -511
mindspore/ops/operations/_quant_ops.py +4 -4
mindspore/ops/operations/_rl_inner_ops.py +3 -3
mindspore/ops/operations/_scalar_ops.py +5 -480
mindspore/ops/operations/_sequence_ops.py +6 -36
mindspore/ops/operations/_tensor_array.py +8 -8
mindspore/ops/operations/array_ops.py +112 -2698
mindspore/ops/operations/comm_ops.py +801 -118
mindspore/ops/operations/custom_ops.py +62 -121
mindspore/ops/operations/debug_ops.py +105 -36
mindspore/ops/operations/image_ops.py +3 -219
mindspore/ops/operations/inner_ops.py +54 -40
mindspore/ops/operations/linalg_ops.py +1 -49
mindspore/ops/operations/manually_defined/__init__.py +24 -0
mindspore/ops/operations/manually_defined/_inner.py +61 -0
mindspore/ops/operations/manually_defined/ops_def.py +2016 -0
mindspore/ops/operations/math_ops.py +621 -4654
mindspore/ops/operations/nn_ops.py +316 -2226
mindspore/ops/operations/other_ops.py +53 -45
mindspore/ops/operations/random_ops.py +4 -51
mindspore/ops/operations/reshard_ops.py +53 -0
mindspore/ops/operations/sparse_ops.py +8 -8
mindspore/ops/primitive.py +204 -103
mindspore/ops/silent_check.py +162 -0
mindspore/ops_generate/__init__.py +27 -0
mindspore/ops_generate/arg_dtype_cast.py +250 -0
mindspore/ops_generate/arg_handler.py +197 -0
mindspore/ops_generate/gen_aclnn_implement.py +263 -0
mindspore/ops_generate/gen_ops.py +1084 -0
mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
mindspore/ops_generate/gen_pyboost_func.py +968 -0
mindspore/ops_generate/gen_utils.py +209 -0
mindspore/ops_generate/op_proto.py +138 -0
mindspore/ops_generate/pyboost_utils.py +354 -0
mindspore/ops_generate/template.py +239 -0
mindspore/parallel/__init__.py +7 -4
mindspore/parallel/_auto_parallel_context.py +155 -6
mindspore/parallel/_cell_wrapper.py +16 -9
mindspore/parallel/_cost_model_context.py +1 -1
mindspore/parallel/_dp_allreduce_fusion.py +159 -159
mindspore/parallel/_parallel_serialization.py +62 -14
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +1 -1
mindspore/parallel/_tensor.py +18 -9
mindspore/parallel/_transformer/__init__.py +1 -1
mindspore/parallel/_transformer/layers.py +1 -1
mindspore/parallel/_transformer/loss.py +1 -1
mindspore/parallel/_transformer/moe.py +1 -1
mindspore/parallel/_transformer/op_parallel_config.py +1 -1
mindspore/parallel/_transformer/transformer.py +10 -10
mindspore/parallel/_utils.py +161 -6
mindspore/parallel/algo_parameter_config.py +6 -8
mindspore/parallel/checkpoint_transform.py +369 -64
mindspore/parallel/cluster/__init__.py +15 -0
mindspore/parallel/cluster/process_entity/__init__.py +18 -0
mindspore/parallel/cluster/process_entity/_api.py +344 -0
mindspore/parallel/cluster/process_entity/_utils.py +126 -0
mindspore/parallel/cluster/run.py +136 -0
mindspore/parallel/mpi/__init__.py +1 -1
mindspore/parallel/mpi/_mpi_config.py +1 -1
mindspore/parallel/parameter_broadcast.py +152 -0
mindspore/parallel/shard.py +128 -17
mindspore/profiler/__init__.py +3 -2
mindspore/profiler/common/process_pool.py +41 -0
mindspore/profiler/common/singleton.py +28 -0
mindspore/profiler/common/util.py +125 -0
mindspore/profiler/envprofiling.py +2 -2
mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
mindspore/profiler/parser/ascend_analysis/constant.py +53 -0
mindspore/profiler/parser/ascend_analysis/file_manager.py +159 -0
mindspore/profiler/parser/ascend_analysis/function_event.py +161 -0
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +131 -0
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +85 -0
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +57 -0
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +116 -0
mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +68 -0
mindspore/profiler/parser/ascend_cluster_generator.py +116 -0
mindspore/profiler/parser/ascend_communicate_generator.py +314 -0
mindspore/profiler/parser/ascend_flops_generator.py +27 -5
mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
mindspore/profiler/parser/ascend_hccl_generator.py +31 -280
mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
mindspore/profiler/parser/ascend_memory_generator.py +185 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +151 -126
mindspore/profiler/parser/ascend_msprof_generator.py +75 -274
mindspore/profiler/parser/ascend_op_generator.py +94 -36
mindspore/profiler/parser/ascend_timeline_generator.py +297 -131
mindspore/profiler/parser/base_timeline_generator.py +17 -3
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
mindspore/profiler/parser/framework_parser.py +11 -4
mindspore/profiler/parser/integrator.py +3 -1
mindspore/profiler/parser/memory_usage_parser.py +8 -2
mindspore/profiler/parser/minddata_analyzer.py +8 -2
mindspore/profiler/parser/minddata_parser.py +73 -4
mindspore/profiler/parser/msadvisor_analyzer.py +5 -3
mindspore/profiler/parser/msadvisor_parser.py +10 -4
mindspore/profiler/parser/profiler_info.py +16 -1
mindspore/profiler/profiling.py +522 -195
mindspore/rewrite/__init__.py +2 -13
mindspore/rewrite/api/node.py +123 -37
mindspore/rewrite/api/pattern_engine.py +2 -3
mindspore/rewrite/api/scoped_value.py +16 -15
mindspore/rewrite/api/symbol_tree.py +46 -30
mindspore/rewrite/ast_helpers/__init__.py +3 -6
mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
mindspore/rewrite/common/__init__.py +1 -2
mindspore/rewrite/common/config.py +24 -0
mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
mindspore/rewrite/{namer.py → common/namer.py} +63 -18
mindspore/rewrite/common/namespace.py +118 -0
mindspore/rewrite/node/__init__.py +5 -5
mindspore/rewrite/node/call_function.py +23 -7
mindspore/rewrite/node/cell_container.py +7 -3
mindspore/rewrite/node/control_flow.py +53 -28
mindspore/rewrite/node/node.py +212 -196
mindspore/rewrite/node/node_manager.py +51 -22
mindspore/rewrite/node/node_topological_manager.py +3 -23
mindspore/rewrite/parsers/__init__.py +12 -0
mindspore/rewrite/parsers/arguments_parser.py +8 -9
mindspore/rewrite/parsers/assign_parser.py +637 -413
mindspore/rewrite/parsers/attribute_parser.py +3 -4
mindspore/rewrite/parsers/class_def_parser.py +115 -148
mindspore/rewrite/parsers/constant_parser.py +5 -5
mindspore/rewrite/parsers/container_parser.py +4 -6
mindspore/rewrite/parsers/expr_parser.py +55 -0
mindspore/rewrite/parsers/for_parser.py +31 -98
mindspore/rewrite/parsers/function_def_parser.py +13 -5
mindspore/rewrite/parsers/if_parser.py +28 -10
mindspore/rewrite/parsers/module_parser.py +8 -182
mindspore/rewrite/parsers/parser.py +1 -5
mindspore/rewrite/parsers/parser_register.py +1 -1
mindspore/rewrite/parsers/return_parser.py +5 -10
mindspore/rewrite/parsers/while_parser.py +59 -0
mindspore/rewrite/sparsify/utils.py +1 -1
mindspore/rewrite/symbol_tree/__init__.py +20 -0
mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +704 -185
mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
mindspore/run_check/_check_version.py +6 -14
mindspore/run_check/run_check.py +1 -1
mindspore/safeguard/rewrite_obfuscation.py +9 -19
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +6 -5
mindspore/train/_utils.py +178 -4
mindspore/train/amp.py +167 -245
mindspore/train/anf_ir_pb2.py +14 -2
mindspore/train/callback/__init__.py +5 -2
mindspore/train/callback/_backup_and_restore.py +5 -5
mindspore/train/callback/_callback.py +4 -4
mindspore/train/callback/_checkpoint.py +151 -37
mindspore/train/callback/_cluster_monitor.py +201 -0
mindspore/train/callback/_early_stop.py +2 -2
mindspore/train/callback/_flops_collector.py +238 -0
mindspore/train/callback/_landscape.py +16 -11
mindspore/train/callback/_loss_monitor.py +2 -2
mindspore/train/callback/_mindio_ttp.py +443 -0
mindspore/train/callback/_on_request_exit.py +2 -2
mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
mindspore/train/callback/_summary_collector.py +13 -14
mindspore/train/callback/_time_monitor.py +3 -3
mindspore/train/data_sink.py +6 -5
mindspore/train/dataset_helper.py +66 -21
mindspore/train/loss_scale_manager.py +2 -2
mindspore/train/metrics/accuracy.py +7 -7
mindspore/train/metrics/confusion_matrix.py +8 -6
mindspore/train/metrics/cosine_similarity.py +6 -4
mindspore/train/metrics/error.py +2 -2
mindspore/train/metrics/metric.py +3 -3
mindspore/train/metrics/perplexity.py +2 -1
mindspore/train/metrics/topk.py +2 -2
mindspore/train/mind_ir_pb2.py +89 -15
mindspore/train/model.py +298 -56
mindspore/train/serialization.py +501 -221
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/_writer_pool.py +1 -1
mindspore/train/summary/summary_record.py +56 -34
mindspore/train/train_thor/convert_utils.py +3 -3
mindspore/turbojpeg.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/METADATA +3 -3
mindspore-2.3.0.dist-info/RECORD +1400 -0
{mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/entry_points.txt +1 -0
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
mindspore/gen_ops.py +0 -273
mindspore/nn/layer/flash_attention.py +0 -189
mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
mindspore/ops/_op_impl/tbe/__init__.py +0 -47
mindspore/ops/_op_impl/tbe/abs.py +0 -38
mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
mindspore/ops/_op_impl/tbe/acos.py +0 -37
mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/acosh.py +0 -37
mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
mindspore/ops/_op_impl/tbe/add.py +0 -42
mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
mindspore/ops/_op_impl/tbe/add_n.py +0 -39
mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
mindspore/ops/_op_impl/tbe/asin.py +0 -37
mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/asinh.py +0 -37
mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/assign.py +0 -79
mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
mindspore/ops/_op_impl/tbe/atan.py +0 -37
mindspore/ops/_op_impl/tbe/atan2.py +0 -38
mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/atanh.py +0 -37
mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
mindspore/ops/_op_impl/tbe/cast.py +0 -55
mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
mindspore/ops/_op_impl/tbe/cdist.py +0 -38
mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
mindspore/ops/_op_impl/tbe/ceil.py +0 -37
mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
mindspore/ops/_op_impl/tbe/celu.py +0 -39
mindspore/ops/_op_impl/tbe/centralization.py +0 -39
mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
mindspore/ops/_op_impl/tbe/concat.py +0 -40
mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
mindspore/ops/_op_impl/tbe/cos.py +0 -37
mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
mindspore/ops/_op_impl/tbe/cosh.py +0 -37
mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
mindspore/ops/_op_impl/tbe/cummin.py +0 -41
mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
mindspore/ops/_op_impl/tbe/diag.py +0 -38
mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
mindspore/ops/_op_impl/tbe/dilation.py +0 -40
mindspore/ops/_op_impl/tbe/div.py +0 -41
mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
mindspore/ops/_op_impl/tbe/elu.py +0 -38
mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/equal.py +0 -42
mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
mindspore/ops/_op_impl/tbe/erf.py +0 -37
mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
mindspore/ops/_op_impl/tbe/erfc.py +0 -37
mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
mindspore/ops/_op_impl/tbe/exp.py +0 -40
mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
mindspore/ops/_op_impl/tbe/expm1.py +0 -37
mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
mindspore/ops/_op_impl/tbe/fill.py +0 -56
mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
mindspore/ops/_op_impl/tbe/flatten.py +0 -48
mindspore/ops/_op_impl/tbe/floor.py +0 -37
mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
mindspore/ops/_op_impl/tbe/gelu.py +0 -37
mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
mindspore/ops/_op_impl/tbe/ger.py +0 -43
mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
mindspore/ops/_op_impl/tbe/greater.py +0 -43
mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
mindspore/ops/_op_impl/tbe/im2col.py +0 -42
mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
mindspore/ops/_op_impl/tbe/inv.py +0 -38
mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
mindspore/ops/_op_impl/tbe/invert.py +0 -37
mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
mindspore/ops/_op_impl/tbe/iou.py +0 -38
mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
mindspore/ops/_op_impl/tbe/is_close.py +0 -40
mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
mindspore/ops/_op_impl/tbe/lerp.py +0 -38
mindspore/ops/_op_impl/tbe/less.py +0 -41
mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
mindspore/ops/_op_impl/tbe/log.py +0 -40
mindspore/ops/_op_impl/tbe/log1p.py +0 -37
mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
mindspore/ops/_op_impl/tbe/lrn.py +0 -41
mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
mindspore/ops/_op_impl/tbe/matmul.py +0 -53
mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
mindspore/ops/_op_impl/tbe/maximum.py +0 -39
mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
mindspore/ops/_op_impl/tbe/minimum.py +0 -40
mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
mindspore/ops/_op_impl/tbe/mish.py +0 -37
mindspore/ops/_op_impl/tbe/mod.py +0 -41
mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
mindspore/ops/_op_impl/tbe/mul.py +0 -37
mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
mindspore/ops/_op_impl/tbe/neg.py +0 -39
mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
mindspore/ops/_op_impl/tbe/pack.py +0 -58
mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
mindspore/ops/_op_impl/tbe/pdist.py +0 -36
mindspore/ops/_op_impl/tbe/pooling.py +0 -46
mindspore/ops/_op_impl/tbe/population_count.py +0 -38
mindspore/ops/_op_impl/tbe/pow.py +0 -41
mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
mindspore/ops/_op_impl/tbe/prelu.py +0 -37
mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
mindspore/ops/_op_impl/tbe/range.py +0 -39
mindspore/ops/_op_impl/tbe/real_div.py +0 -38
mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
mindspore/ops/_op_impl/tbe/relu.py +0 -39
mindspore/ops/_op_impl/tbe/relu6.py +0 -38
mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
mindspore/ops/_op_impl/tbe/renorm.py +0 -39
mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
mindspore/ops/_op_impl/tbe/rint.py +0 -37
mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/roll.py +0 -42
mindspore/ops/_op_impl/tbe/round.py +0 -38
mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
mindspore/ops/_op_impl/tbe/select.py +0 -38
mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
mindspore/ops/_op_impl/tbe/selu.py +0 -39
mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
mindspore/ops/_op_impl/tbe/sgd.py +0 -62
mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
mindspore/ops/_op_impl/tbe/sign.py +0 -38
mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
mindspore/ops/_op_impl/tbe/sin.py +0 -37
mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
mindspore/ops/_op_impl/tbe/sinh.py +0 -37
mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/slice.py +0 -58
mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
mindspore/ops/_op_impl/tbe/softmax.py +0 -37
mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
mindspore/ops/_op_impl/tbe/softplus.py +0 -37
mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
mindspore/ops/_op_impl/tbe/softsign.py +0 -37
mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
mindspore/ops/_op_impl/tbe/sort.py +0 -38
mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
mindspore/ops/_op_impl/tbe/split_d.py +0 -38
mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
mindspore/ops/_op_impl/tbe/split_v.py +0 -39
mindspore/ops/_op_impl/tbe/splitv.py +0 -39
mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
mindspore/ops/_op_impl/tbe/square.py +0 -38
mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
mindspore/ops/_op_impl/tbe/sub.py +0 -39
mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
mindspore/ops/_op_impl/tbe/tan.py +0 -38
mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
mindspore/ops/_op_impl/tbe/tanh.py +0 -37
mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
mindspore/ops/_op_impl/tbe/tile.py +0 -37
mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
mindspore/ops/_op_impl/tbe/top_k.py +0 -42
mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
mindspore/ops/_op_impl/tbe/transpose.py +0 -60
mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
mindspore/ops/_op_impl/tbe/trunc.py +0 -39
mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
mindspore/ops/_op_impl/tbe/unpack.py +0 -38
mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
mindspore/ops/_tracefunc.py +0 -241
mindspore/ops/arg_dtype_cast.py +0 -54
mindspore/rewrite/api/tree_node_helper.py +0 -60
mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
mindspore/rewrite/namespace.py +0 -53
mindspore-2.2.11.dist-info/RECORD +0 -1920
{mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/WHEEL +0 -0
{mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/top_level.txt +0 -0

mindspore/ops/function/nn_func.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
+# Copyright 2023-2024 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 """Defines nn operators with functional form."""
 from __future__ import absolute_import
-from math import pi, log
+from math import pi, log, floor
 from mindspore import context
 from mindspore import log as logger
@@ -29,6 +29,7 @@ import mindspore.common.dtype as mstype
 from mindspore.ops.function.math_func import logsumexp
 from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
 from mindspore.common.tensor import Tensor
+from mindspore.common.parameter import Parameter
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore.ops._primitive_cache import _get_cache_prim
 from mindspore import _checkparam as validator
@@ -38,22 +39,62 @@ from mindspore.ops.operations.nn_ops import FractionalMaxPoolWithFixedKsize, Fra
 from mindspore.ops.operations.nn_ops import PadV3
 from mindspore.ops.operations.nn_ops import ChannelShuffle
 from mindspore.ops.operations.nn_ops import TripletMarginLoss
-from mindspore.ops.operations._inner_ops import SiLU
 from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
 from mindspore.common.api import _function_forbid_reuse
-slice_ = P.Slice()
-fast_gelu_ = P.FastGeLU()
-softsign_ = P.Softsign()
+from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6
+from mindspore.ops.auto_generate import group_norm_op, rms_norm, layer_norm_ext_op, batch_norm_ext_op
+from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, reflection_pad_3d_op, # pylint: disable=W0611
+                                         replication_pad_1d_op, replication_pad_2d_op, replication_pad_3d_op,
+                                         constant_pad_nd_op, dropout_ext_op, reverse_v2_impl)
+from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution
+from mindspore.common.generator import default_generator
+abs_ = P.Abs()
+add_ = P.Add()
+bias_add_ = P.BiasAdd()
+cast_ = P.Cast()
+div_ = P.Div()
+dtype_ = P.DType()
+equal_ = P.Equal()
+erf_ = P.Erf()
+exp_ = P.Exp()
+expand_dims_ = P.ExpandDims()
+fillv2_ = P.FillV2()
+gather_ = P.Gather()
+gather_d_ = P.GatherD()
+gelu_ = P.GeLU()
+greater_ = P.Greater()
 hardswish_ = P.HSwish()
+less_ = P.Less()
+list_to_tensor_ = ListToTensor()
+log_ = P.Log()
+matmul_ = P.MatMul()
+maximum_ = P.Maximum()
+minimum_ = P.Minimum()
 mish_ = NN_OPS.Mish()
-selu_ = NN_OPS.SeLU()
+mul_ = P.Mul()
+neg_ = P.Neg()
+ones_like_ = P.OnesLike()
+reduce_mean_ = P.ReduceMean()
+reduce_sum_ = P.ReduceSum()
+reshape_ = P.Reshape()
 scalar_to_tensor_ = P.ScalarToTensor()
-list_to_tensor_ = ListToTensor()
-tuple_to_tensor_ = TupleToTensor()
+select_ = P.Select()
+selu_ = NN_OPS.SeLU()
+shape_ = P.Shape()
+sigmoid_ = P.Sigmoid()
+sign_ = P.Sign()
+slice_ = P.Slice()
+softplus_ = P.Softplus()
+softsign_ = P.Softsign()
+sqrt_ = P.Sqrt()
+square_ = P.Square()
+sub_ = P.Sub()
+tensor_shape_ = P.TensorShape()
 tensor_to_tuple_ = TensorToTuple()
-cast_ = P.Cast()
-sigmoid_ = NN_OPS.Sigmoid()
+transpose_ = P.Transpose()
+tuple_to_tensor_ = TupleToTensor()
 check_positive_int_const = validator.check_positive_int
 check_positive_int_sequence_const = validator.check_positive_int_sequence
 check_positive_float_const = validator.check_positive_float
@@ -63,6 +104,7 @@ check_int_const = validator.check_is_int
 check_non_negative_float_const = validator.check_non_negative_float
 check_string_const = constexpr(validator.check_string)
+generator_step_ = Tensor(1, mstype.int64)
 def adaptive_avg_pool2d(input, output_size):
     r"""
@@ -103,11 +145,11 @@ def adaptive_avg_pool2d(input, output_size):
     .. math::
         out\_shape = \begin{cases}
-        input\_x\_shape[-2] + output\_size[1], & \text{if output_size is (None, w);}\\
-        output\_size[0] + input\_x\_shape[-1], & \text{if output_size is (h, None);}\\
-        input\_x\_shape[-2:], & \text{if output_size is (None, None);}\\
-        (h, h), & \text{if output_size is h;}\\
-        (h, w), & \text{if output_size is (h, w)}
+        input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
+        output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
+        input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
+        (h, h), & \text{if } output\_size text{ is h;}\\
+        (h, w), & \text{if } output\_size text{ is (h, w)}
         \end{cases}
     Raises:
@@ -275,7 +317,7 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         Tensor of shape :math:`(N, C_{out}, L_{out})`.
     Raises:
-        TypeError: If `input_x` is not an Tensor.
+        TypeError: If `input_x` is not a Tensor.
         TypeError: If `kernel_size` or `stride` is not an int.
         TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
         ValueError: If length of shape of `input_x` is not equal to `3`.
@@ -298,9 +340,6 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     if not isinstance(input_x, (Tensor, Tensor_)):
         raise TypeError("For avg_pool1d, the input input_x must be tensor")
-    if len(input_x.shape) != 3:
-        raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
     _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
     if isinstance(padding, int):
         check_non_negative_int(padding, 'padding', 'avg_pool1d')
@@ -319,7 +358,6 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
             raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
         stride = stride[0]
-    expand_op = _get_cache_prim(P.ExpandDims)()
     squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
     avg_pool_op = _get_cache_prim(P.AvgPool3D)(kernel_size=(1, 1, kernel_size),
                                                strides=(1, 1, stride),
@@ -327,8 +365,8 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
                                                pad=padding,
                                                ceil_mode=ceil_mode,
                                                count_include_pad=count_include_pad)
-    input_x = expand_op(input_x, 2)
-    input_x = expand_op(input_x, 2)
+    input_x = expand_dims_(input_x, 2)
+    input_x = expand_dims_(input_x, 2)
     input_x = avg_pool_op(input_x)
     input_x = squeeze_op(input_x)
     return input_x
@@ -429,7 +467,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
     Raises:
-        TypeError: If `input_x` is not an Tensor.
+        TypeError: If `input_x` is not a Tensor.
         TypeError: If `kernel_size` or `stride` is neither int nor tuple.
         TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
         TypeError: If `divisor_override` is not an int.
@@ -459,15 +497,10 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     if not isinstance(input_x, (Tensor, Tensor_)):
         raise TypeError("For avg_pool2d, the input input_x must be tensor")
-    if len(input_x.shape) != 4:
-        raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
     kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
     stride = _check_avgpool_2d_stride(stride)
     padding = _check_avgpool_2d_padding(padding)
     _check_avg_pool2d_type_and_value(ceil_mode, count_include_pad, divisor_override)
-    expand_op = _get_cache_prim(P.ExpandDims)()
     squeeze_op = _get_cache_prim(P.Squeeze)(2)
     avg_pool_op = _get_cache_prim(P.AvgPool3D)(kernel_size=kernel_size,
                                                strides=stride,
@@ -476,13 +509,82 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
                                                ceil_mode=ceil_mode,
                                                count_include_pad=count_include_pad,
                                                divisor_override=divisor_override)
-    input_x = expand_op(input_x, 2)
+    input_x = expand_dims_(input_x, 2)
     input_x = avg_pool_op(input_x)
     input_x = squeeze_op(input_x)
     return input_x
-@constexpr
+def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True,
+                   divisor_override=None):
+    r"""
+        Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
+        Typically the input is of shape :math:`(N, C, H_{in}, W_{in})`, outputs regional average in the
+        :math:`(H_{in}, W_{in})`-dimension. Given kernel size :math:`(k_{H}, k_{W})` and `stride` , the operation
+        is as follows.
+        .. math::
+            \text{output}(N_i, C_j, h, w) = \frac{1}{k_{H} * k_{W}} \sum_{m=0}^{k_{H}-1} \sum_{n=0}^{k_{W}-1}
+            \text{input}(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
+        Args:
+            input (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})`.
+            kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
+                Can be a single number or a tuple (kH, kW).
+            stride (Union[int, tuple[int], list[int]]): The distance of kernel moving. Can be a single number or
+                a tuple (sH, sW). Default value is `kernel_size` .
+            padding (Union(int, tuple[int], list[int])): Implicit zero padding to be added on both sides.
+                Can be a single number or a tuple (padH, padW). Default: 0.
+            ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape.
+                Default: ``False``.
+            count_include_pad (bool): If True, include the zero-padding in the averaging calculation.
+                Default: ``True`` .
+            divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
+                otherwise size of pooling region will be used. Default: ``None``.
+        Returns:
+            Tensor, with shape :math:`(N, C, H_{out}, W_{out})`.
+            .. math::
+                \begin{array}{ll} \\
+                    H_{out} = \frac{H_{in} + 2 \times padding[0] - kernel_size[0]}{stride[0]} + 1 \\
+                    W_{out} = \frac{W_{in} + 2 \times padding[1] - kernel_size[1]}{stride[1]} + 1
+                \end{array}
+        Raises:
+            TypeError: If `input` is not a Tensor.
+            TypeError: If `kernel_size` or `stride` is neither int nor tuple.
+            TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
+            TypeError: If `divisor_override` is not an int or None.
+            ValueError: If the dimension of `input` is not equal to `4` or `3`.
+            ValueError: If `kernel_size` or `stride` is less than 1.
+            ValueError: If `kernel_size` or `stride` is a tuple whose length is not equal to `2` or `1`.
+            ValueError: If `padding` is neither a int nor a tuple whose length is equal to `2` or `1`.
+            ValueError: If value of `padding` is less than `0`.
+        Supported Platforms:
+            ``Ascend``
+        Examples:
+            >>> import mindspore
+            >>> import numpy as np
+            >>> from mindspore import Tensor, ops
+            >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mindspore.float32)
+            >>> output = ops.function.nn_func.avg_pool2d_ext(x, kernel_size=2, stride=1)
+            >>> print(output)
+            [[[[ 2.5   3.5   4.5]
+            [ 6.5   7.5   8.5]]
+            [[14.5  15.5  16.5]
+            [18.5  19.5  20.5]]
+            [[26.5  27.5  28.5]
+            [30.5  31.5  32.5]]]]
+    """
+    if stride is None:
+        stride = kernel_size
+    return _get_cache_prim(ops.auto_generate.AvgPool2D)()(input, kernel_size, stride, padding,
+                                                          ceil_mode, count_include_pad, divisor_override)
 def _check_avg_pool3d_padding(padding):
     """Check the padding value in avg_pool3d op."""
     if isinstance(padding, int):
@@ -537,7 +639,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
         Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
     Raises:
-        TypeError: If `input_x` is not an Tensor.
+        TypeError: If `input_x` is not a Tensor.
         TypeError: If `kernel_size`, `stride` or `padding` is neither an int not a tuple.
         TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
         TypeError: If `divisor_override` is not an int.
@@ -563,9 +665,6 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     if not isinstance(input_x, (Tensor, Tensor_)):
         raise TypeError("For avg_pool3d, the input input_x must be tensor")
-    if len(input_x.shape) != 5:
-        raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
     _check_avg_pool3d_padding(padding)
     avg_pool_op = _get_cache_prim(P.AvgPool3D)(kernel_size=kernel_size,
@@ -638,7 +737,7 @@ def adaptive_max_pool1d(input, output_size):
     _check_adaptive_max_pool1d_output_size(output_size)
     x_in_shape = input.shape
-    x_dtype = _get_cache_prim(P.DType)()(input)
+    x_dtype = dtype_(input)
     if len(x_in_shape) != 3:
         raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
@@ -657,18 +756,14 @@ def adaptive_max_pool1d(input, output_size):
             raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
                             f"but got {x_dtype}.")
-    expand_ = _get_cache_prim(P.ExpandDims)()
     squeeze_ = _get_cache_prim(P.Squeeze)(2)
     width = x_in_shape[2]
     stride = width // output_size
     kernel_size = width - (output_size - 1) * stride
     stride = (1, width // output_size)
     kernel_size = (1, kernel_size)
     max_pool_ = _get_cache_prim(NN_OPS.MaxPool)(kernel_size=kernel_size, strides=stride)
-    input = expand_(input, 2)
+    input = expand_dims_(input, 2)
     input = max_pool_(input)
     input = squeeze_(input)
@@ -807,6 +902,8 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
         >>> print(output[1].asnumpy())
         [[[[33 35]]]]
     """
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
     adaptive_max_pool3d_ = _get_cache_prim(NN_OPS.AdaptiveMaxPool3D)()
     output_size_ = Tensor(output_size, dtype=mstype.int32)
     out = adaptive_max_pool3d_(input, output_size_)
@@ -814,18 +911,6 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
     return output
-def check_shape(x_shape, indices_shape, func_name):
-    """
-    :param x_shape: the shape of x.
-    :param indices_shape: the shape of indices.
-    :param func_name: the name of function.
-    :return:
-    """
-    if x_shape != indices_shape:
-        raise ValueError(f"For {func_name}, the x shape and indices shape must be equal, but got input "
-                         f"shape {x_shape} and indices shape {indices_shape}.")
 def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=None):
     r"""
     Computes the inverse of `max_pool1d`.
@@ -836,7 +921,7 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     .. math::
         \begin{array}{ll} \\
-        H_{out} = (H{in} - 1) \times stride[0] - 2 \times padding[0] + kernel\_size[0] \\
+        H_{out} = (H_{in} - 1) \times stride[0] - 2 \times padding[0] + kernel\_size[0] \\
         \end{array}
     Args:
@@ -885,13 +970,8 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     if stride is None:
         stride = kernel_size
-    shape = P.Shape()
-    x_shape = shape(x)
-    indices_shape = shape(indices)
+    x_shape = shape_(x)
     x_dim = len(x_shape)
-    check_shape(x_shape, indices_shape, "max_unpool1d")
-    if x_dim not in (2, 3):
-        raise ValueError(f"For max_unpool1d, the x shape must have 2 or 3 dims, but got {x_dim}.")
     if output_size is None:
         output_size = ()
@@ -1009,13 +1089,8 @@ def max_unpool2d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     if stride is None:
         stride = kernel_size
-    shape = P.Shape()
-    x_shape = shape(x)
-    indices_shape = shape(indices)
+    x_shape = shape_(x)
     x_dim = len(x_shape)
-    check_shape(x_shape, indices_shape, "max_unpool2d")
-    if x_dim not in (3, 4):
-        raise ValueError(f"For max_unpool2d, the x shape must have 3 or 4 dims, but got {x_dim}.")
     if output_size is None:
         output_size = ()
@@ -1118,12 +1193,8 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     if stride is None:
         stride = kernel_size
-    x_shape = P.Shape()(x)
-    indices_shape = P.Shape()(indices)
+    x_shape = shape_(x)
     x_dim = len(x_shape)
-    check_shape(x_shape, indices_shape, "max_unpool3d")
-    if x_dim not in (4, 5):
-        raise ValueError(f"For max_unpool3d, the x shape must have 4 or 5 dims, but got {x_dim}.")
     if output_size is None:
         output_size = ()
@@ -1151,12 +1222,12 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
     return out
-def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
+def binary_cross_entropy_with_logits(input, target, weight=None, pos_weight=None, reduction='mean'):
     r"""
-    Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
-    between the logits and the label.
+    Adds sigmoid activation function to input `input` as logits, and uses the given logits to compute binary cross
+    entropy between the `input` and the `target`.
-    Sets input logits as :math:`X`, input label as :math:`Y`, input weight as :math:`W`, output as :math:`L`. Then,
+    Sets input `input` as :math:`X`, input target as :math:`Y`, input weight as :math:`W`, output as :math:`L`. Then,
     .. math::
@@ -1197,14 +1268,14 @@ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None
     :math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
     Args:
-        logits (Tensor): Input logits. Data type must be float16 or float32.
-        label (Tensor): Ground truth label, has the same shape as `logits`.
+        input (Tensor): Input `input`. Data type must be float16 or float32.
+        target (Tensor): Ground truth label, has the same shape as `input`.
           Data type must be float16 or float32.
         weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
-          broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+          broadcast to a tensor with shape of `input`. Data type must be float16 or float32.
           Default: ``None``, `weight` is a Tensor whose value is ``1``.
         pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
-          number of classes. It can be broadcast to a tensor with shape of `logits`.
+          number of classes. It can be broadcast to a tensor with shape of `input`.
           Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -1214,14 +1285,14 @@ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
+        Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `input`.
         Otherwise, the output is a scalar.
     Raises:
-        TypeError: If input `logits`, `label`, `weight`, `pos_weight` is not Tensor.
-        TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
+        TypeError: If input `input`, `target`, `weight`, `pos_weight` is not Tensor.
+        TypeError: If data type of input `input`, `target`, `weight`, `pos_weight` is neither float16 nor float32.
         TypeError: If data type of input `reduction` is not string.
-        ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
+        ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `input`.
         ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
     Supported Platforms:
@@ -1231,21 +1302,17 @@ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None
         >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, ops
-        >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]), mindspore.float32)
-        >>> label = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]), mindspore.float32)
+        >>> input = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]), mindspore.float32)
+        >>> target = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]), mindspore.float32)
         >>> weight = Tensor(np.array([1.0, 1.0, 1.0]), mindspore.float32)
         >>> pos_weight = Tensor(np.array([1.0, 1.0, 1.0]), mindspore.float32)
-        >>> output = ops.binary_cross_entropy_with_logits(logits, label, weight, pos_weight)
+        >>> output = ops.binary_cross_entropy_with_logits(input, target, weight, pos_weight)
         >>> print(output)
         0.3463612
     """
-    if weight is None:
-        weight = ops.ones_like(logits)
-    if pos_weight is None:
-        pos_weight = ops.ones_like(logits)
     bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
-    return bce_with_logits_loss_op(logits, label, weight, pos_weight)
+    return bce_with_logits_loss_op(input, target, weight, pos_weight)
 @_function_forbid_reuse
@@ -1294,47 +1361,45 @@ def dropout(input, p=0.5, training=True, seed=None):
     return out
-def celu(x, alpha=1.0):
+@_function_forbid_reuse
+def dropout_ext(input, p=0.5, training=True):
     r"""
-    celu activation function, computes celu (Continuously differentiable exponential
-    linear units) of input tensors element-wise. The formula is defined as follows:
-    .. math::
-        \text{CeLU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))
-    For more details, please refer to `celu <https://arxiv.org/abs/1704.07483>`_.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
+    During training, randomly zeroes some of the elements of the input tensor
+    with probability `p` from a Bernoulli distribution. It plays the role of reducing neuron correlation and
+    avoid overfitting. And the return will be multiplied by :math:`\frac{1}{1-p}` during training.
+    During the reasoning, this operation returns the same Tensor as the `input`.
     Args:
-        x (Tensor): The input of celu with data type of float16 or float32.
-        alpha (float, optional): The :math:`\alpha` value for the Celu formulation. Default: 1.0
+        input (Tensor): The input Tensor of shape :math:`(*, N)`.
+        p (float): The dropping rate of input neurons, between 0 and 1, e.g. `p` = 0.1,
+            means dropping out 10% of input neurons. Default: ``0.5`` .
+        training (bool): Apply dropout if it is ``True`` , if it is ``False`` , the input is returned directly,
+            and `p` is invalid. Default: ``True``.
     Returns:
-        Tensor, has the same data type and shape as the input.
+        - **output** (Tensor) - Zeroed tensor, with the same shape and data type as `input`.
     Raises:
-        TypeError: If `alpha` is not a float.
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        ValueError: If `alpha` has the value of 0.
+        TypeError: If `p` is not a float.
+        TypeError: If `input` is not a Tensor.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend``
     Examples:
         >>> import mindspore
-        >>> import numpy as np
         >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([-2.0, -1.0, 1.0, 2.0]), mindspore.float32)
-        >>> output = ops.celu(x, alpha=1.0)
-        >>> print(output)
-        [-0.86466473 -0.63212055  1.          2.        ]
+        >>> input = Tensor(((20, 16), (50, 50)), mindspore.float32)
+        >>> output = ops.function.nn_func.dropout_ext(input, p=0.5)
+        >>> print(output.shape)
+        (2, 2)
     """
-    celu_op = _get_cache_prim(P.CeLU)(alpha)
-    return celu_op(x)
+    check_bool_const(training, "training", "dropout_ext")
+    if training is False:
+        return input
+    seed, offset = default_generator._step(generator_step_)  # pylint: disable=protected-access
+    out, _ = dropout_ext_op(input, p, seed, offset)
+    return out
 def dropout1d(input, p=0.5, training=True):
@@ -1520,42 +1585,6 @@ def dropout3d(input, p=0.5, training=True):
     return out
-def fast_gelu(x):
-    r"""
-    Fast Gaussian Error Linear Units activation function.
-    FastGeLU is defined as follows:
-    .. math::
-        \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|)),
-    where :math:`x` is the element of the input.
-    Args:
-        x (Tensor): Input to compute the FastGeLU with data type of float16 or float32.
-    Returns:
-        Tensor, with the same type and shape as `x`.
-    Raises:
-        TypeError: If dtype of `x` is neither float16 nor float32.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> output = ops.fast_gelu(x)
-        >>> print(output)
-        [[-1.5418735e-01  3.9921875e+00 -9.7473649e-06]
-         [ 1.9375000e+00 -1.0052517e-03  8.9824219e+00]]
-    """
-    return fast_gelu_(x)
 @_primexpr
 def _check_float_range_inc_neither(arg_value, lower_limit, upper_limit, arg_name=None, prim_name=None):
     """
@@ -1574,7 +1603,7 @@ def _check_fractional_output_size_ratio(output_size, output_ratio, cls_name):
 def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False,
                           _random_samples=None):
     r"""
-    Applies the 2D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
+    Applies the 2D FractionalMaxPool operation over `input`. The output Tensor shape can be determined by either
     `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
     effect when `output_size` and `output_ratio` are set at the same time.
     And `output_size` and `output_ratio` can not be ``None`` at the same time.
@@ -1686,7 +1715,7 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
 def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False,
                           _random_samples=None):
     r"""
-    Applies the 3D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
+    Applies the 3D FractionalMaxPool operation over `input`. The output Tensor shape can be determined by either
     `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
     effect when `output_size` and `output_ratio` are set at the same time.
     And `output_size` and `output_ratio` can not be ``None`` at the same time.
@@ -1707,7 +1736,7 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
             is an int number that represents depth, height and width of the kernel, or a tuple
             of three int numbers that represent depth, height and width respectively.
             The value must be a positive integer.
-        output_size (Union[int, tuple[int]], optional): The Shape of the target `output_size`,
+        output_size (Union[int, tuple[int]], optional): The shape of the target `output_size`,
             is an int number that represents depth, height and width, or a tuple
             of three int numbers that represent depth, height and width respectively.
             The value must be a positive integer.
@@ -1813,10 +1842,10 @@ def kl_div(logits, labels, reduction='mean'):
     .. math::
         \ell(x, target) = \begin{cases}
-        L, & \text{if reduction} = \text{'none';}\\
-        \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
-        \operatorname{batchmean}(L), & \text{if reduction} = \text{'batchmean';}\\
-        \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
+        L(x, target), & \text{if reduction} = \text{'none';}\\
+        \operatorname{mean}(L(x, target)), & \text{if reduction} = \text{'mean';}\\
+        \operatorname{sum}(L(x, target)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\
+        \operatorname{sum}(L(x, target)),  & \text{if reduction} = \text{'sum'.}
         \end{cases}
     where :math:`x` represents `logits`.
@@ -1826,7 +1855,7 @@ def kl_div(logits, labels, reduction='mean'):
     Note:
         - Currently it does not support float64 input on `Ascend`.
         - The output aligns with the mathematical definition of Kullback-Leibler divergence
-          only when `reduction` is set to 'batchmean'.
+          only when `reduction` is set to ``'batchmean'``.
     Args:
         logits (Tensor): The input Tensor. The data type must be float16, float32 or float64.
@@ -1834,6 +1863,11 @@ def kl_div(logits, labels, reduction='mean'):
         reduction (str): Specifies the reduction to be applied to the output.
             Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
+            - ``'batchmean'``: the summed output elements divided by batch size.
     Returns:
         Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
         Otherwise, it is a scalar.
@@ -1841,7 +1875,7 @@ def kl_div(logits, labels, reduction='mean'):
     Raises:
         TypeError: If `reduction` is not a str.
         TypeError: If neither `logits` nor `labels` is a Tensor.
-        TypeError: If dtype of `logits` or `labels` is not float32.
+        TypeError: If dtype of `logits` or `labels` is not the supported type.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1861,20 +1895,20 @@ def kl_div(logits, labels, reduction='mean'):
                          f"'['none', 'mean', 'batchmean', 'sum']', but got '{reduction}'.")
     if reduction == 'batchmean':
-        kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
-        shape = P.Shape()(logits)
+        kl_div_sum = _get_cache_prim(P.KLDivLoss)(reduction='sum')(logits, labels)
+        shape = shape_(logits)
         batch_size = shape[0]
         return kl_div_sum / batch_size
     if reduction == 'mean':
-        kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
-        shape = P.Shape()(logits)
+        kl_div_sum = _get_cache_prim(P.KLDivLoss)(reduction='sum')(logits, labels)
+        shape = shape_(logits)
         total_size = 1
         for dim in shape:
             total_size = total_size * dim
         return kl_div_sum / total_size
-    return P.KLDivLoss(reduction=reduction)(logits, labels)
+    return _get_cache_prim(P.KLDivLoss)(reduction=reduction)(logits, labels)
 def hardshrink(x, lambd=0.5):
@@ -1891,9 +1925,15 @@ def hardshrink(x, lambd=0.5):
         0, & \text{ otherwise }
         \end{cases}
+    HShrink Activation Function Graph:
+    .. image:: ../images/HShrink.png
+        :align: center
     Args:
         x (Tensor): The input of Hard Shrink with data type of float16 or float32.
-        lambd (float): The threshold :math:`\lambda` defined by the Hard Shrink formula. Default: ``0.5`` .
+        lambd (float, optional): The threshold :math:`\lambda` defined by the Hard Shrink formula.
+            Default: ``0.5`` .
     Returns:
         Tensor, has the same data type and shape as the input `x`.
@@ -1995,16 +2035,16 @@ def flip(input, dims):
     Raises:
         TypeError: If the input is not a tensor.
         ValueError: If `dims` is None.
-        ValueError: If `dims` is not a tuple of ints.
+        ValueError: If `dims` is not a list/tuple of ints.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> import mindspore
+        >>> from mindspore import ops
         >>> import numpy as np
-        >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
+        >>> input = mindspore.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
         >>> output = ops.flip(input, (0, 2))
         >>> print(output)
         [[[6 5]
@@ -2012,7 +2052,7 @@ def flip(input, dims):
          [[2 1]
           [4 3]]]
     """
-    res = _get_cache_prim(ops.ReverseV2)(axis=dims)(input)
+    res = reverse_v2_impl(input, dims)
     return res
@@ -2034,7 +2074,7 @@ def flipud(input):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> import numpy as np
         >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
         >>> output = ops.flipud(input)
@@ -2065,7 +2105,7 @@ def fliplr(input):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> import numpy as np
         >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
         >>> output = ops.fliplr(input)
@@ -2094,7 +2134,7 @@ def is_floating_point(input):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> from mindspore import Tensor
         >>> x = ms.Tensor([1, 2, 3], ms.float32)
         >>> y = ms.Tensor([1, 2, 3], ms.int64)
@@ -2105,7 +2145,7 @@ def is_floating_point(input):
         >>> print(output2)
         False
     """
-    return input.dtype in [mstype.float32, mstype.float16, mstype.float64]
+    return input.dtype in [mstype.float32, mstype.bfloat16, mstype.float16, mstype.float64]
 def hardswish(x):
@@ -2120,6 +2160,11 @@ def hardswish(x):
     where :math:`x_i` is an element of the input Tensor.
+    HSwish Activation Function Graph:
+    .. image:: ../images/HSwish.png
+        :align: center
     Args:
         x (Tensor): The input to compute the Hard Swish.
@@ -2151,15 +2196,25 @@ def _is_dim_unknown(shape):
 @_primexpr
 def _interploate_make_tuple(rank, value):
+    """
+    make tuple in dynamic scenarios
+    """
     s = tuple_to_tensor_((rank,), mstype.int32)
-    v = Tensor(value)
-    t = _get_cache_prim(P.FillV2)()(s, v)
+    v = None
+    if isinstance(value, int):
+        v = F.scalar_to_tensor(value, mstype.int64)
+    else:
+        v = F.scalar_to_tensor(value, mstype.float32)
+    t = fillv2_(s, v)
     out = tensor_to_tuple_(t)
     return out
 @_primexpr
 def _interpolate_scale_factor_convert_size(shape, scale_factor):
+    """
+    convert scale_factor to size
+    """
     x = tuple_to_tensor_(shape[2:], mstype.int64)
     y = tuple_to_tensor_(scale_factor, mstype.float32)
     t = x * y
@@ -2169,6 +2224,9 @@ def _interpolate_scale_factor_convert_size(shape, scale_factor):
 def _interpolate_size_check_with_rank(size, input_rank):
+    """
+    size rank check
+    """
     if len(size) != input_rank - 2:
         raise ValueError(
             f"For 'interpolate', 'input' and 'size' must have the same spatial dimensions, "
@@ -2176,6 +2234,9 @@ def _interpolate_size_check_with_rank(size, input_rank):
 def _interpolate_scale_factor_check_with_rank(scale_factor, input_rank):
+    """
+    scale_factor rank check
+    """
     if len(scale_factor) != input_rank - 2:
         raise ValueError(
             f"For 'interpolate', 'input' and 'scale_factor' must have the same spatial dimensions, "
@@ -2184,6 +2245,9 @@ def _interpolate_scale_factor_check_with_rank(scale_factor, input_rank):
 def _interpolate_mode_check(mode, supported_dict):
+    """
+    mode check
+    """
     if isinstance(mode, list) or mode not in supported_dict:
         raise ValueError(
             f"For 'interpolate', 'mode' must be in '{list(supported_dict)}', but got {mode}"
@@ -2191,6 +2255,9 @@ def _interpolate_mode_check(mode, supported_dict):
 def _interpolate_rank_check(input_rank, mode, supported_dict):
+    """
+    rank check
+    """
     if input_rank not in supported_dict.get(mode):
         raise ValueError(
             f"For 'interpolate', {mode} only support '{list(supported_dict.get(mode, {}))}'D, but got {input_rank}D"
@@ -2198,6 +2265,9 @@ def _interpolate_rank_check(input_rank, mode, supported_dict):
 def _interpolate_scale_factor_check(scale_factor, mode, rank, supported_dict):
+    """
+    scale_factor check
+    """
     if scale_factor is not None and "scale_factor" not in supported_dict.get(
             mode, {}).get(rank):
         raise ValueError(
@@ -2206,6 +2276,9 @@ def _interpolate_scale_factor_check(scale_factor, mode, rank, supported_dict):
 def _interpolate_align_corners_mode_check(rank, mode, supported_dict):
+    """
+    align_corners check
+    """
     if "align_corners" not in supported_dict.get(mode, {}).get(rank):
         raise ValueError(
             f"For 'interpolate', 'align_corners' option cannot currently be set with the "
@@ -2238,17 +2311,22 @@ def interpolate(input,
             'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
             knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
-        align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
-            aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
-            Default: ``None`` .
+        align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
+            applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
             .. code-block::
-                old_i = new_length != 1 ? new_i * (old_length - 1) / (new_length - 1) : 0   # 'align_corners' = True
+                ori_i = new_length != 1 ? new_i * (ori_length - 1) / (new_length - 1) : 0   # 'align_corners' = True
+                ori_i = new_length > 1 ? (new_i + 0.5) * ori_length / new_length - 0.5 : 0  # 'align_corners' = False
-                old_i = new_length > 1 ? (new_x + 0.5) * old_length / new_length - 0.5 : 0  # 'align_corners' = False
+            Among them, :math:`ori\_length` and :math:`new\_length` represent the length of the Tensor before and after
+            transformation along the x-axis respectively; :math:`new\_i` represents the coordinate of the i-th element
+            along the x-axis after transformation; :math:`ori\_i` represents
+            the corresponding coordinate of the original
+            data along the x-axis.
-            This is only valid for 'linear', 'bilinear', or 'bicubic' modes. Default: ``False`` .
+            This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
         recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
             If True, the parameter `size` will be calculated using the value of the `scale_factor`,
             and finally scaled using the value of `size`.
@@ -2331,7 +2409,7 @@ def interpolate(input,
             x = x.unsqueeze(-1)
             x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
                 x, size)
-            x = P.Squeeze(-1)(x)
+            x = _get_cache_prim(P.Squeeze)(-1)(x)
         elif size is not None and x_rank == 4:
             size = seq.TupleToTensor()(size[:2], mstype.int32)
             x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
@@ -2383,7 +2461,7 @@ def interpolate(input,
                 align_corners=False,
                 half_pixel_centers=True)
             x = resize(x, size)
-            x = P.Squeeze(-1)(x)
+            x = _get_cache_prim(P.Squeeze)(-1)(x)
         if x_rank == 4:
             if isinstance(size, int):
                 size = F.scalar_to_tensor(size, mstype.int32)
@@ -2503,7 +2581,12 @@ def interpolate(input,
             raise ValueError(
                 "For 'interpolate', it is incorrect to set 'recompute_scale_factor' to True"
                 " after specifying an explicit 'size'.")
-        size = _interpolate_scale_factor_convert_size(shape, scale_factor)
+        if F.isconstant(shape) and F.isconstant(scale_factor):
+            tuple_len = min(len(shape) - 2, len(scale_factor))
+            size = tuple([floor(shape[i + 2] * scale_factor[i])
+                          for i in range(tuple_len)])
+        else:
+            size = _interpolate_scale_factor_convert_size(shape, scale_factor)
         scale_factor = None
     else:
         if dim_unknown is False:
@@ -2521,54 +2604,341 @@ def interpolate(input,
     return resize_func.get(mode)(input, size, align_corners, scale_factor)
-def upsample(input, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None):
-    r"""
-    Alias for :func:`mindspore.ops.interpolate` .
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+def _interploate_ext_make_tuple(input, value):
     """
-    return interpolate(input, size, scale_factor, mode, align_corners, recompute_scale_factor)
+    make tuple
+    """
+    if isinstance(value, (list, tuple)):
+        return value
+    rank = F.rank(input) - 2
+    out = None
+    if F.isconstant(value) and F.isconstant(rank):
+        out = tuple([value for _ in range(rank)])
+    else:
+        s = tuple_to_tensor_((rank,), mstype.int32)
+        v = None
+        if isinstance(value, int):
+            v = F.scalar_to_tensor(value, mstype.int64)
+        else:
+            v = F.scalar_to_tensor(value, mstype.float32)
+        t = fillv2_(s, v)
+        out = tensor_to_tuple_(t)
+    return out
-def softsign(x):
+def _interpolate_ext_scale_factor_convert_size(input, scale_factor):
+    """
+    convert scale_factor to size
+    """
+    shape = F.shape(input)
+    size = None
+    if F.isconstant(shape) and F.isconstant(scale_factor):
+        tuple_len = min(len(shape) - 2, len(scale_factor))
+        size = tuple([floor(shape[i + 2] * scale_factor[i])
+                      for i in range(tuple_len)])
+    else:
+        x = tuple_to_tensor_(shape[2:], mstype.int64)
+        y = tuple_to_tensor_(scale_factor, mstype.float32)
+        t = x * y
+        t = ops.TruncateDiv()(t, Tensor(1))
+        t = ops.cast(t, mstype.int64)
+        size = tensor_to_tuple_(t)
+    return size
+def interpolate_ext(input,
+                    size=None,
+                    scale_factor=None,
+                    mode="nearest",
+                    align_corners=None,
+                    recompute_scale_factor=None):
     r"""
-    Softsign activation function.
-    The function is shown as follows:
+    Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
-    .. math::
-        \text{SoftSign}(x) = \frac{x}{1 + |x|}
+    .. note::
+        - In 'linear' mode, backpropagation does not support scenarios where `scale_factor` is not None
+          and `align_corners` is False.
     Args:
-        x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-            additional dimensions, with float16 or float32 data type.
+        input (Tensor): Tensor to be resized.
+            Input tensor must be a 3-D, 4-D, or 5-D tensor with shape
+            :math:`(N, C, [optional D], [optional H], W)` , with data type of float.
+        size (Union[int, tuple[int], list[int]], optional): The target size.
+            If size is a tuple or list, its length should be the same as the number of dimensions in input
+            after removing the first two dimensions N, C.
+            One and only one of size and scale_factor can be set to None. Default: ``None`` .
+        scale_factor (Union[float, tuple[float], list[float]], optional): The scale factor of new size of the tensor.
+            If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
+            after removing the first two dimensions N, C.
+            One and only one of size and scale_factor can be set to None. Default: ``None`` .
+        mode (str): The sampling algorithm.
+            One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
+            'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
+            knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
+        align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
+            applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
+            .. code-block::
+                ori_i = new_length != 1 ? new_i * (ori_length - 1) / (new_length - 1) : 0   # 'align_corners' = True
+                ori_i = new_length > 1 ? (new_i + 0.5) * ori_length / new_length - 0.5 : 0  # 'align_corners' = False
+            Among them, :math:`ori\_length` and :math:`new\_length` represent the length of the Tensor before and after
+            transformation along the x-axis respectively; :math:`new\_i` represents the coordinate of the i-th element
+            along the x-axis after transformation; :math:`ori\_i` represents
+            the corresponding coordinate of the original
+            data along the x-axis.
+            This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
+        recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
+            If True, the parameter `size` will be calculated using the value of the `scale_factor`,
+            and finally scaled using the value of `size`.
+            If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
+    .. note::
+        The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
+        scikit-image and PIL. The 'nearest' mode produces the same results as the INTER_NEAREST interpolation
+        algorithm used in OpenCV.
+    Args Support List and Supported Platforms:
+    +---------------+-----------+---------------+--------------+----------------+
+    | mode          | input.dim | align_corners | scale_factor | device         |
+    +===============+===========+===============+==============+================+
+    | nearest       | 3         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    |               | 4         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    |               | 5         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | linear        | 3         | √             | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | bilinear      | 4         | √             | ×            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | bicubic       | 4         | √             | ×            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | area          | 3         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    |               | 4         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    |               | 5         | \-            | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    | nearest-exact | 3         | \-            | ×            | Ascend,CPU     |
+    +---------------+-----------+---------------+--------------+----------------+
+    |               | 4         | \-            | ×            | Ascend,CPU     |
+    +---------------+-----------+---------------+--------------+----------------+
+    | trilinear     | 5         | √             | √            | Ascend,GPU,CPU |
+    +---------------+-----------+---------------+--------------+----------------+
+    - `-` indicates that there is no such parameter.
+    - `×` indicates that this parameter is not currently supported.
+    - `√` indicates that this parameter is supported.
     Returns:
-        Tensor, with the same type and shape as the `x`.
+        Tensor, resized, whose dimensions and dtype are the same as `input`.
     Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
+        TypeError: `input` is not a Tensor.
+        ValueError: Both `size` and `scale_factor` are not empty.
+        ValueError: Both `size` and `scale_factor` are empty.
+        ValueError: When `size` is a tuple or list, its length is not equal to `input.ndim - 2`.
+        ValueError: When `scale_factor` is a tuple or list, its length is not equal to `input.ndim - 2`.
+        ValueError: `mode` is not in the list of supported modes.
+        ValueError: `input.ndim` is not in the list of supported dimensions for the corresponding mode.
+        ValueError: `size` is not empty, `recompute_scale_factor` is not empty.
+        ValueError: `scale_factor` is not in the corresponding list of supported values.
+        ValueError: `align_corners` is not in the corresponding list of supported values.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
-        >>> output = ops.softsign(x)
+        >>> from mindspore import Tensor, mint
+        >>> input = Tensor([[[1, 2, 3], [4, 5, 6]]], mindspore.float32)
+        >>> output = mint.interpolate(input, size=(6,), mode='nearest')
         >>> print(output)
-        [ 0.        -0.5         0.6666667  0.9677419 -0.9677419]
+            [[[1. 1. 2. 2. 3. 3.]
+              [4. 4. 5. 5. 6. 6.]]]
     """
-    return softsign_(x)
+    def run_nearest(x, size, align_corners=None, scale_factor=None):
+        x_rank = F.rank(x)
+        if x_rank == 3:
+            x = _get_cache_prim(ops.auto_generate.UpsampleNearest1D)()(
+                x, size, scale_factor)
+        elif x_rank == 4:
+            x = _get_cache_prim(ops.auto_generate.UpsampleNearest2D)()(
+                x, size, scale_factor)
+        else:
+            x = _get_cache_prim(P.UpsampleNearest3D)()(x, size, scale_factor)
+        return x
+    def run_linear(x, size, align_corners=None, scale_factor=None):
+        out = _get_cache_prim(
+            ops.auto_generate.UpsampleLinear1D)()(x, size, scale_factor, align_corners)
+        return out
-def soft_margin_loss(input, target, reduction='mean'):
-    r"""
-    Calculate the soft margin loss of input and target.
+    def run_bilinear(x, size, align_corners=None, scale_factor=None):
+        out = _get_cache_prim(
+            ops.auto_generate.UpsampleBilinear2D)()(x, size, scale_factor, align_corners)
+        return out
+    def run_trilinear(x, size, align_corners=None, scale_factor=None):
+        resize = _get_cache_prim(P.nn_ops.UpsampleTrilinear3D)(align_corners)
+        return resize(x, size, scale_factor)
+    def run_bicubic(x, size, align_corners=None, scale_factor=None):
+        resize = _get_cache_prim(P.image_ops.ResizeBicubic)(
+            align_corners=align_corners, half_pixel_centers=not align_corners)
+        x = resize(x, size)
+        return x
+    def run_area(x, size, align_corners=None, scale_factor=None):
+        x_rank = F.rank(x)
+        if x_rank == 3:
+            x = F.adaptive_avg_pool1d(x, size[0])
+        elif x_rank == 4:
+            x = F.adaptive_avg_pool2d(x, tuple(size))
+        else:
+            x = F.adaptive_avg_pool3d(x, tuple(size))
+        return x
+    def run_nearest_exact(x, size, align_corners=None, scale_factor=None):
+        x_rank = F.rank(x)
+        if x_rank == 3:
+            size = size[:1] + (1,)
+            # For impl of nearest 3D use 4D.
+            x = x.unsqueeze(-1)
+            resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
+                align_corners=False,
+                half_pixel_centers=True)
+            x = resize(x, size)
+            x = _get_cache_prim(P.Squeeze)(-1)(x)
+        if x_rank == 4:
+            resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
+                align_corners=False,
+                half_pixel_centers=True)
+            x = resize(x, size)
+        return x
+    resize_funcs = {
+        "nearest": run_nearest,
+        "linear": run_linear,
+        "bilinear": run_bilinear,
+        "bicubic": run_bicubic,
+        "trilinear": run_trilinear,
+        "area": run_area,
+        "nearest-exact": run_nearest_exact,
+    }
+    # mode check
+    if mode not in resize_funcs:
+        raise ValueError(
+            f"For 'interpolate', 'mode' must be in '{list(resize_funcs)}', but got {mode}"
+        )
+    if mode in ("nearest", "area", "nearest-exact"):
+        if align_corners is not None:
+            raise ValueError("align_corners option can only be set with the "
+                             "interpolating modes: linear | bilinear | bicubic | trilinear"
+                             )
+    else:
+        if align_corners is None:
+            align_corners = False
+    # check for size and scale_factor
+    if size is not None and scale_factor is not None:
+        raise ValueError(
+            "For 'interpolate', 'size' and 'scale_factor' cannot be set simultaneously"
+        )
+    if size is not None:
+        size = _interploate_ext_make_tuple(input, size)
+    elif scale_factor is not None:
+        scale_factor = _interploate_ext_make_tuple(input, scale_factor)
+    else:
+        raise ValueError(
+            "For 'interpolate', 'size' and 'scale_factor' cannot be both empty"
+        )
+    # "area" mode always requires an explicit size rather than scale factor.
+    if mode == "area" and size is None:
+        recompute_scale_factor = True
+    # recompute_scale_factor
+    if recompute_scale_factor is not None and recompute_scale_factor:
+        if size is not None:
+            raise ValueError(
+                "For 'interpolate', it is incorrect to set 'recompute_scale_factor' to True"
+                " after specifying an explicit 'size'.")
+        size = _interpolate_ext_scale_factor_convert_size(input, scale_factor)
+        scale_factor = None
+    # scale_factor
+    if mode in ("bilinear", "bicubic", "nearest-exact"):
+        if scale_factor is not None:
+            raise ValueError("scale_factor option can only be set with the "
+                             "interpolating modes: nearest | linear | area | trilinear"
+                             )
+    return resize_funcs.get(mode)(input, size, align_corners, scale_factor)
+def upsample(input, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None):
+    r"""
+    Alias for :func:`mindspore.ops.interpolate` .
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    """
+    return interpolate(input, size, scale_factor, mode, align_corners, recompute_scale_factor)
+def softsign(x):
+    r"""
+    SoftSign activation function.
+    The function is shown as follows:
+    .. math::
+        \text{SoftSign}(x) = \frac{x}{1 + |x|}
+    Softsign Activation Function Graph:
+    .. image:: ../images/Softsign.png
+        :align: center
+    Args:
+        x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
+            additional dimensions, with float16 or float32 data type.
+    Returns:
+        Tensor, with the same type and shape as the `x`.
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        TypeError: If dtype of `x` is neither float16 nor float32.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
+        >>> output = ops.softsign(x)
+        >>> print(output)
+        [ 0.        -0.5         0.6666667  0.9677419 -0.9677419]
+    """
+    return softsign_(x)
+def soft_margin_loss(input, target, reduction='mean'):
+    r"""
+    Calculate the soft margin loss of input and target.
     Creates a criterion that optimizes a two-class classification
     logistic loss between input tensor :math:`x` and target tensor :math:`y`
@@ -2584,7 +2954,7 @@ def soft_margin_loss(input, target, reduction='mean'):
     Args:
         input (Tensor): Predict data. Data type must be float16 or float32.
-        target (Tensor): Ground truth data, with the same type and shape as `logits`.
+        target (Tensor): Ground truth data, with the same type and shape as `input`.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -2593,7 +2963,7 @@ def soft_margin_loss(input, target, reduction='mean'):
             - ``'sum'``: the output elements will be summed.
     Outputs:
-        Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
+        Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `input`.
         Otherwise, a scalar value will be returned.
     Raises:
@@ -2620,34 +2990,31 @@ def soft_margin_loss(input, target, reduction='mean'):
     return output
-def softmax(x, axis=-1, *, dtype=None):
+def softmax(input, axis=-1, *, dtype=None):
     r"""
     Applies the Softmax operation to the input tensor on the specified axis.
-    Suppose a slice in the given axis :math:`x`, then for each element :math:`x_i`,
+    Suppose a slice in the given axis :math:`axis`, then for each element :math:`input_i`,
     the Softmax function is shown as follows:
     .. math::
-        \text{output}(x_i) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
+        \text{output}(input_i) = \frac{\exp(input_i)}{\sum_{j = 0}^{N-1}\exp(input_j)},
     where :math:`N` is the length of the tensor.
     Args:
-        axis (Union[int, tuple[int]], optional): The axis to perform the Softmax operation. Default: ``-1`` .
-        x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
+        input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
           additional dimensions, with float16 or float32 data type.
+        axis (int, optional): The axis to perform the Softmax operation. Default: ``-1`` .
     Keyword Args:
-        dtype (:class:`mindspore.dtype`, optional): When set, `x` will be converted to the specified type,
+        dtype (:class:`mindspore.dtype`, optional): When set, `input` will be converted to the specified type,
             `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
     Returns:
-        Tensor, with the same type and shape as the logits.
+        Tensor, with the same type and shape as the `input`.
     Raises:
-        TypeError: If `axis` is not an int or a tuple.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        ValueError: If `axis` is a tuple whose length is less than 1.
-        ValueError: If `axis` is a tuple whose elements are not all in range [-len(logits.shape), len(logits.shape))
+        TypeError: If `axis` is not an int.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2656,8 +3023,8 @@ def softmax(x, axis=-1, *, dtype=None):
         >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
-        >>> output = ops.softmax(x)
+        >>> input = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> output = ops.softmax(input)
         >>> print(output)
         [0.01165623 0.03168492 0.08612854 0.23412167 0.6364086 ]
     """
@@ -2666,9 +3033,57 @@ def softmax(x, axis=-1, *, dtype=None):
         type_axis = type(axis).__name__
         raise TypeError(f" the type of 'axis' must be 'int', but got '{axis}' with type '{type_axis}'.")
     if dtype is not None:
-        x = ops.cast(x, dtype)
-    softmax_ = _get_cache_prim(P.Softmax)(axis=axis)
-    return softmax_(x)
+        input = ops.cast(input, dtype)
+    softmax_ = _get_cache_prim(P.Softmax)(axis)
+    return softmax_(input)
+def softmax_ext(input, dim=None, dtype=None):
+    r"""
+    Applies the Softmax operation to the input tensor on the specified axis.
+    Suppose a slice in the given axis :math:`dim`, then for each element :math:`input_i`,
+    the Softmax function is shown as follows:
+    .. math::
+        \text{output}(input_i) = \frac{\exp(input_i)}{\sum_{j = 0}^{N-1}\exp(input_j)},
+    where :math:`N` is the length of the tensor.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
+            additional dimensions.
+        dim (int, optional): The dim to perform the Softmax operation. Default: ``None`` .
+    Keyword Args:
+        dtype (:class:`mindspore.dtype`, optional): When set, `input` will be converted to the specified type,
+            `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
+    Returns:
+        Tensor, with the same type and shape as the `input`.
+    Raises:
+        TypeError: If `dim` is not an int.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> output = ops.function.nn_func.softmax_ext(input)
+        >>> print(output)
+        [0.01165623 0.03168492 0.08612854 0.23412167 0.6364086 ]
+    """
+    dim = -1 if dim is None else dim
+    if not isinstance(dim, int):
+        type_dim = type(dim).__name__
+        raise TypeError(f" the type of 'dim' must be 'int', but got '{dim}' with type '{type_dim}'.")
+    if dtype is not None:
+        input = ops.cast(input, dtype)
+    softmax_ = _get_cache_prim(P.Softmax)(dim)
+    return softmax_(input)
 def softmin(x, axis=-1, *, dtype=None):
@@ -2692,7 +3107,7 @@ def softmin(x, axis=-1, *, dtype=None):
             `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
     Returns:
-        Tensor, with the same type and shape as the logits.
+        Tensor, with the same type and shape as `x`.
     Raises:
         TypeError: If `axis` is not an int or a tuple.
@@ -2715,7 +3130,7 @@ def softmin(x, axis=-1, *, dtype=None):
     if dtype is not None:
         x = ops.cast(x, dtype)
-    softmax_ = _get_cache_prim(P.Softmax)(axis=axis)
+    softmax_ = _get_cache_prim(P.Softmax)(axis)
     return softmax_(-1*x)
@@ -2731,6 +3146,11 @@ def softshrink(x, lambd=0.5):
         0, & \text{ otherwise }
         \end{cases}
+    SoftShrink Activation Function Graph:
+    .. image:: ../images/Softshrink.png
+        :align: center
     Args:
         x (Tensor): The input of soft shrink with data type of float16 or float32.
         lambd (float): The :math:`\lambda` must be no less than zero. Default: ``0.5`` .
@@ -2739,15 +3159,16 @@ def softshrink(x, lambd=0.5):
         Tensor, has the same shape and data type as `x`.
     Raises:
-        TypeError: If lambd is not a float.
-        TypeError: If input_x is not a Tensor.
-        TypeError: If dtype of input_x is neither float16 nor float32.
-        ValueError: If lambd is less than 0.
+        TypeError: If `lambd` is not a float.
+        TypeError: If `x` is not a Tensor.
+        TypeError: If dtype of `x` is neither float16 nor float32.
+        ValueError: If `lambd` is less than 0.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
         >>> from mindspore import Tensor
         >>> from mindspore import ops
         >>> import numpy as np
@@ -2813,45 +3234,11 @@ def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
         >>> print(output)
         [0.7443967 0.79813886 30. 25.]
     """
-    softplus_op = _get_cache_prim(P.Softplus)()
     scaling_input = beta * input
-    op_output = (1 / beta) * softplus_op(scaling_input)
+    op_output = (1 / beta) * softplus_(scaling_input)
     return ops.select(input * beta > threshold, input, op_output)
-def silu(x):
-    r"""
-    Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
-    .. math::
-        \text{SiLU}(x) = x * \sigma(x),
-    where the Logistic Sigmoid function is defined as:
-    .. math::
-        \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
-    where :math:`x_i` is an element of the x.
-    For more details, please refer to :class:`mindspore.nn.SiLU`.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import numpy as np
-        >>> import mindspore
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
-        >>> output = ops.silu(x)
-        >>> print(output)
-        [-0.269  1.762  -0.1423  1.762  -0.269]
-    """
-    silu_ = _get_cache_prim(SiLU)()
-    return silu_(x)
 def selu(input_x):
     r"""
     Activation function SeLU (Scaled exponential Linear Unit).
@@ -2871,14 +3258,20 @@ def selu(input_x):
     See more details in `Self-Normalizing Neural Networks <https://arxiv.org/abs/1706.02515>`_.
+    SeLU Activation Function Graph:
+    .. image:: ../images/SeLU.png
+        :align: center
     Args:
-        input_x (Tensor): Tensor of any dimension, the data type is float16 or float32.
+        input_x (Tensor): Tensor of any dimension,
+            the data type is int8, int32, float16, float32, or float64 (CPU, GPU only).
     Returns:
         Tensor, with the same type and shape as the `input_x`.
     Raises:
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
+        TypeError: If dtype of `input_x` is not int8, int32, float16, float32, or float64.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2896,41 +3289,6 @@ def selu(input_x):
     return selu_(input_x)
-def sigmoid(input):
-    r"""
-    Computes Sigmoid of input element-wise. The Sigmoid function is defined as:
-    .. math::
-        \text{sigmoid}(input_i) = \frac{1}{1 + \exp(-input_i)}
-    where :math:`input_i` is an element of the input.
-    Args:
-        input (Tensor): Tensor of any dimension, the data type is float16, float32, float64, complex64 or complex128.
-    Returns:
-        Tensor, with the same type and shape as the input.
-    Raises:
-        TypeError: If dtype of `input` is not float16, float32, float64, complex64 or complex128.
-        TypeError: If `input` is not a Tensor.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> input = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
-        >>> output = ops.sigmoid(input)
-        >>> print(output)
-        [0.7310586  0.880797   0.95257413 0.98201376 0.9933072 ]
-    """
-    return _get_cache_prim(NN_OPS.Sigmoid)()(input)
 def logsigmoid(x):
     r"""
     Applies logsigmoid activation element-wise. The input is a Tensor with any valid shape.
@@ -2942,6 +3300,11 @@ def logsigmoid(x):
     where :math:`x_{i}` is the element of the input.
+    LogSigmoid Activation Function Graph:
+    .. image:: ../images/LogSigmoid.png
+        :align: center
     Args:
         x (Tensor): The input of LogSigmoid with data type of float16 or float32.
           The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
@@ -2964,63 +3327,11 @@ def logsigmoid(x):
         >>> print(output)
         [-0.31326166 -0.12692806 -0.04858734]
     """
-    output = _get_cache_prim(P.Sigmoid)()(x)
-    ret = _get_cache_prim(P.Log)()(output)
+    output = sigmoid_(x)
+    ret = log_(output)
     return ret
-def dense(input, weight, bias=None):
-    r"""
-    Applies the dense connected operation to the `input`. The dense function is defined as:
-    .. math::
-        output = input * weight^{T} + bias
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
-    Args:
-        input (Tensor): Input Tensor of shape :math:`(*, in\_channels)`,
-            where :math:`*` means any number of additional dimensions.
-        weight (Tensor): The weight applied to the input.
-            The shape is :math:`(out\_channels, in\_channels)` or :math:`(in\_channels)`.
-        bias (Tensor, optional): Additive biases to the output.
-            The shape is :math:`(out\_channels)` or :math:`()`. Defaults: ``None``, the `bias` is 0.
-    Returns:
-        Output whose shape is determined by the shape of the input and the weight.
-    Raises:
-        TypeError: If `input` is not Tensor.
-        TypeError: If `weight` is not Tensor.
-        TypeError: If `bias` is not Tensor.
-    Supported Platforms:
-        ``Ascend`` ``GPU``  ``CPU``
-    Examples:
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> input = mindspore.Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
-        >>> weight = mindspore.Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
-        >>> bias = mindspore.Tensor([0., 1.], mindspore.float32)
-        >>> output = mindspore.ops.dense(input, weight, bias)
-        >>> print(output)
-        [[-4.  0.]
-         [10.  4.]]
-    """
-    _check_is_tensor("input", input, "dense")
-    _check_is_tensor("weight", weight, "dense")
-    _check_is_tensor("bias", bias, "dense")
-    weight = ops.t(weight)
-    input = ops.matmul(input, weight)
-    input_shape = input.shape
-    if bias is not None:
-        input = input + bias
-        _check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
-    return input
 def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
     """Check that the output has the correct shape after adding bias."""
     if input_shape != output_shape:
@@ -3111,14 +3422,15 @@ def bidense(input1, input2, weight, bias=None):
         input1 = input1.reshape((-1, input1_shape[-1]))
         input2 = input2.reshape((-1, input2_shape[-1]))
     batch_size = input1.shape[0]
-    matmul_ = P.MatMul()
     output = matmul_(input1, weight.transpose(1, 2, 0).view(input1_shape[-1], -1))
     output = output.view(batch_size, input2_shape[-1], weight.shape[0])
     output = output.transpose(2, 0, 1) * input2
     output = output.sum(2).swapaxes(0, 1)
     if bias is not None:
-        bias_add_ = P.BiasAdd()
-        output = bias_add_(output, bias)
+        if input1.dtype != bias.dtype or input2.dtype != bias.dtype:
+            raise TypeError(f"For 'bidense', the dtype of 'bias', 'input1' and 'input2' must be the same,"
+                            f" but got {bias.dtype}, {input1.dtype} and {input2.dtype}.")
+        output = bias_add_(output.astype(bias.dtype), bias)
     if len(input1_shape) != 2:
         output_shape = input1_shape[:-1] + (-1,)
         output = output.reshape(output_shape)
@@ -3187,7 +3499,7 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
         TypeError: If `strides`, `padding`, `kernel_size` or `dilations` is not a tuple with integer elements.
         TypeError: If `modulated` is not a bool.
         ValueError: If the tuple size of `strides`, `padding`, `kernel_size` or `dilations` is not expected.
-        ValueError: The N or C dimensions of 'strides' or `dilations` is not set to 1.
+        ValueError: The N or C dimensions of `strides` or `dilations` is not set to 1.
         ValueError: If `modulated` is not set to True.
     .. warning::
@@ -3212,13 +3524,10 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
                                                                    deformable_groups,
                                                                    modulated)
     fm_offset = deformable_offsets(x, offsets)
     weight_shape = weight.shape
     out_channel = weight_shape[0]
     strides_conv = (kernel_size[0], kernel_size[1])
     conv = _get_cache_prim(P.Conv2D)(out_channel, kernel_size, 1, "valid", 0, strides_conv, 1, groups)
-    bias_add_ = _get_cache_prim(P.BiasAdd)()
     output = conv(fm_offset, weight)
     if bias is not None:
         output = bias_add_(output, bias)
@@ -3229,9 +3538,7 @@ def pdist(input, p=2.0):
     r"""
     Calculates the distance between every pair of row vectors in
     the input using the p-norm. If the input `input` is a 2D Tensor with shape :math:`(N, M)`,
-    the `output` must be a 1D Tensor with shape :math:`(N * (N - 1) / 2,)`. If `input` has batch
-    dimension with shape :math:`(*B, N, M)`, then the `output` must be a Tensor with
-    shape :math:`(*B, N * (N - 1) / 2)`.
+    the `output` must be a 1D Tensor with shape :math:`(N * (N - 1) / 2,)`.
     .. math::
         y[n] = \sqrt[p]{{\mid x_{i} - x_{j} \mid}^p}
@@ -3239,8 +3546,7 @@ def pdist(input, p=2.0):
     where :math:`x_{i}, x_{j}` are two different row vectors in the input.
     Args:
-        input (Tensor): Input tensor of shape :math:`(*B, N, M)`. :math:`*B` is batch size, one-dim or multi-dim.
-            dtype: float16, float32 or float64.
+        input (Tensor): Input tensor. dtype: float16, float32 or float64.
         p (float): The order of norm distance, :math:`p∈[0, ∞)`. Default: ``2.0`` .
     Returns:
@@ -3268,7 +3574,144 @@ def pdist(input, p=2.0):
     return pdist_(input)
-@_primexpr
+def _circular_pad(input_x, padding):
+    """circular pad"""
+    if isinstance(padding, tuple):
+        padding = tuple_to_tensor_(padding, mstype.int64)
+    elif isinstance(padding, list):
+        padding = list_to_tensor_(padding, mstype.int64)
+    is_expand = False
+    if padding.shape[0] // 2 + 1 == input_x.ndim:
+        input_x = input_x.expand_dims(0)
+        is_expand = True
+    out = PadV3(mode="circular", paddings_contiguous=True)(input_x, padding, None)
+    if is_expand:
+        out = out.squeeze(0)
+    return out
+def _reflection_pad(input, pad):
+    """reflection pad"""
+    out = input
+    if len(pad) == 2:
+        out = reflection_pad_1d_op(input, pad)
+    elif len(pad) == 4:
+        out = reflection_pad_2d_op(input, pad)
+    else:
+        out = reflection_pad_3d_op(input, pad)
+    return out
+def _replication_pad(input, pad):
+    """replication pad"""
+    out = input
+    if len(pad) == 2:
+        out = replication_pad_1d_op(input, pad)
+    elif len(pad) == 4:
+        out = replication_pad_2d_op(input, pad)
+    else:
+        out = replication_pad_3d_op(input, pad)
+    return out
+def pad_ext(input, pad, mode='constant', value=0.0):
+    r"""
+    Pads the input tensor according to the pad.
+    .. warning::
+        `circular` mode has poor performance and is not recommended.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of additional dimensions.
+        pad (Union[tuple[int], list[int], Tensor]): Filling position of pad.
+            :math:`\left\lfloor\frac{\text{len(pad)}}{2}\right\rfloor` dimensions
+            of `input` will be padded.
+            Example: to pad only the last dimension of the input tensor, then
+            :attr:`pad` has the form
+            :math:`(\text{padding_left}, \text{padding_right})`;
+            Example: to pad the last 2 dimensions of the input tensor, then use
+            :math:`(\text{padding_left}, \text{padding_right}, \text{padding_top}, \text{padding_bottom})`;
+            Example: to pad the last 3 dimensions, use
+            :math:`(\text{padding_left}, \text{padding_right}, \text{padding_top}, \text{padding_bottom},
+            \text{padding_front}, \text{padding_back})` and so on.
+        mode (str, optional): Pad filling mode, ``'constant'`` , ``'reflect'`` , ``'replicate'``  or ``'circular'`` .
+            Default: ``'constant'`` .
+            For ``'constant'`` mode, please refer to :class:`mindspore.nn.ConstantPad1d` as an example to understand
+            this filling pattern and extend the padding pattern to n dimensions.
+            For ``'reflect'`` mode, please refer to :class:`mindspore.nn.ReflectionPad1d` as an example to understand
+            this filling pattern.
+            The reflect mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
+            or 4D input, or the last dimension of 2D or 3D input.
+            For ``'replicate'`` mode, please refer to :class:`mindspore.nn.ReplicationPad1d` as an example to understand
+            this filling pattern.
+            The replicate mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
+            or 4D input, or the last dimension of 2D or 3D input.
+            For ``'circular'`` mode, the pixels from one edge of the image are wrapped around to the opposite edge,
+            such that the pixel on the right edge of the image is replaced with the pixel on the left edge,
+            and the pixel on the bottom edge is replaced with the pixel on the top edge.
+            The circular mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
+            or 4D input, or the last dimension of 2D or 3D input.
+        value (Union[int, float, None], optional): Valid only in ``'constant'`` mode.
+            Set the padding value in ``'constant'`` mode. If the value is None, 0 is used as the default padding value.
+            Default: ``0.0`` .
+    Returns:
+        Tensor, the tensor after padding.
+    Raises:
+        TypeError: If `pad` is not an int of tuple or int of list.
+        TypeError: If `input` is not a Tensor.
+        ValueError: If length of `pad` is not even.
+        ValueError: If length of `pad` is greater than 6.
+        ValueError: If `mode` is not ``'constant'`` and `value` not ``None``.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore import ops
+        >>> import numpy as np
+        >>> x = ms.Tensor(np.arange(1 * 2 * 2 * 2).reshape((1, 2, 2, 2)), dtype=ms.float64)
+        >>> output = ops.function.nn_func.pad_ext(x, [1, 0, 0, 1], mode='constant', value=6.0)
+        >>> print(output)
+        [[[[6. 0. 1.]
+           [6. 2. 3.]
+           [6. 6. 6.]]
+          [[6. 4. 5.]
+           [6. 6. 7.]
+           [6. 6. 6.]]]]
+    """
+    if not isinstance(input, Tensor):
+        raise TypeError(f"For 'pad', the type of 'input' must be Tensor, but got {type(input)}.")
+    out = input
+    if (isinstance(pad, tuple) and not pad):
+        return out
+    if mode == "constant":
+        value = 0 if value is None else value
+        out = constant_pad_nd_op(input, pad, value)
+    else:
+        if value != 0.0:
+            raise ValueError(f"Padding mode {mode} doesn\'t take in value argument.")
+        if mode == "circular":
+            out = _circular_pad(input, pad)
+        elif mode == "reflect":
+            out = _reflection_pad(input, pad)
+        elif mode == "replicate":
+            out = _replication_pad(input, pad)
+        else:
+            raise ValueError(f"Pad filling mode must be 'constant' 'circular' 'reflect' or 'replicate'.")
+    return out
 def _check_pad_inputs(padding):
     """check the input of pad"""
     if len(padding) % 2 != 0:
@@ -3286,8 +3729,10 @@ def pad(input_x, padding, mode='constant', value=None):
     Pads the input tensor according to the padding.
     Args:
-        input_x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of additional dimensions.
-        padding (Union[tuple[int], list[int], Tensor]): Filling position of pad.
+        input_x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of additional dimensions
+            which is required to be no more than 5 in Ascend.
+        padding (Union[tuple[int], list[int], Tensor]): Filling position of pad where the negative value is not
+            supported while running in Ascend.
             :math:`\left\lfloor\frac{\text{len(padding)}}{2}\right\rfloor` dimensions
             of `input_x` will be padded.
@@ -3296,56 +3741,56 @@ def pad(input_x, padding, mode='constant', value=None):
             :math:`(\text{padding_left}, \text{padding_right})`;
             Example: to pad the last 2 dimensions of the input tensor, then use
-            :math:`(\text{padding_left}, \text{padding_right}`,
-            :math:`\text{padding_top}, \text{padding_bottom})`;
+            :math:`(\text{padding_left}, \text{padding_right}, \text{padding_top}, \text{padding_bottom})`;
             Example: to pad the last 3 dimensions, use
-            :math:`(\text{padding_left}, \text{padding_right}`,
-            :math:`\text{padding_top}, \text{padding_bottom}`,
-            :math:`\text{padding_front}, \text{padding_back})` and so on.
+            :math:`(\text{padding_left}, \text{padding_right}, \text{padding_top}, \text{padding_bottom},
+            \text{padding_front}, \text{padding_back})` and so on.
-        mode (str, optional): Pad filling mode, ``"constant"`` , ``"reflect"`` , ``"replicate"``  or ``"circular"`` .
+        mode (str, optional): Pad filling mode, ``'constant'`` , ``'reflect'`` , ``'replicate'``  or ``'circular'`` .
             Default: ``'constant'`` .
-            For "constant" mode, please refer to :class:`mindspore.nn.ConstantPad1d` as an example to understand
+            For ``'constant'`` mode, please refer to :class:`mindspore.nn.ConstantPad1d` as an example to understand
             this filling pattern and extend the padding pattern to n dimensions.
-            For "reflect" mode, please refer to :class:`mindspore.nn.ReflectionPad1d` as an example to understand
+            For ``'reflect'`` mode, please refer to :class:`mindspore.nn.ReflectionPad1d` as an example to understand
             this filling pattern.
             The reflect mode is used to pad the last two dimensions of 3D or 4D input, or the last dimension of 2D or
             3D input.
-            For "replicate" mode, please refer to :class:`mindspore.nn.ReplicationPad1d` as an example to understand
+            For ``'replicate'`` mode, please refer to :class:`mindspore.nn.ReplicationPad1d` as an example to understand
             this filling pattern.
             The replicate mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
             or 4D input, or the last dimension of 2D or 3D input.
-            For "circular" mode, the pixels from one edge of the image are wrapped around to the opposite edge,
+            For ``'circular'`` mode, the pixels from one edge of the image are wrapped around to the opposite edge,
             such that the pixel on the right edge of the image is replaced with the pixel on the left edge,
             and the pixel on the bottom edge is replaced with the pixel on the top edge.
             The circular mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
             or 4D input, or the last dimension of 2D or 3D input.
-        value (Union[int, float, None], optional): Valid only in "constant" mode.
-            Set the padding value in "constant" mode. If the value is None, 0 is used as the default padding value.
+        value (Union[int, float, None], optional): Valid only in ``'constant'`` mode.
+            Set the padding value in ``'constant'`` mode. If the value is None, 0 is used as the default padding value.
             Default: ``None`` .
     Returns:
         Tensor, the tensor after padding.
     Raises:
-        TypeError: If `paddings` is not an int of tuple or int of list.
+        TypeError: If `padding` is not an int of tuple or int of list.
         TypeError: If `input_x` is not a Tensor.
         ValueError: If length of `padding` is not even.
         ValueError: If length of `padding` is greater than 6.
-        ValueError: If mode is not "constant" and value not None.
+        ValueError: If `mode` is not ``'constant'`` and `value` not ``None``.
+        ValueError: If rank of `input_x` is more than 5 while running in Ascend.
+        ValueError: If `paddings` contains negative value while running in Ascend.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> import numpy as np
         >>> x = ms.Tensor(np.arange(1 * 2 * 2 * 2).reshape((1, 2, 2, 2)), dtype=ms.float64)
         >>> output = ops.pad(x, [1, 0, 0, 1], mode='constant', value=6.0)
@@ -3395,7 +3840,7 @@ def pad(input_x, padding, mode='constant', value=None):
         return input_x
     if not isinstance(padding, Tensor):
         _check_pad_inputs(padding)
-        padding = Tensor(padding)
+        padding = tuple(padding)
     is_expand = False
     if mode == "constant":
         value = 0 if value is None else value
@@ -3408,7 +3853,7 @@ def pad(input_x, padding, mode='constant', value=None):
             raise ValueError(f"For 'pad', the padding mode '{mode}' can not set value, but got value {value}.")
         if mode == "replicate":
             mode = "edge"
-        if padding.shape[0] // 2 + 1 == input_x.ndim:
+        if len(padding) // 2 + 1 == input_x.ndim:
             input_x = input_x.expand_dims(0)
             is_expand = True
     out = PadV3(mode=mode, paddings_contiguous=True)(input_x, padding, value)
@@ -3417,142 +3862,6 @@ def pad(input_x, padding, mode='constant', value=None):
     return out
-def relu(input):
-    r"""
-    Computes ReLU (Rectified Linear Unit activation function) of input tensors element-wise.
-    It returns :math:`\max(input,\  0)` element-wise. Specially, the neurons with the negative output
-    will be suppressed and the active neurons will stay the same.
-    .. math::
-        ReLU(input) = (input)^+ = \max(0, input)
-    Note:
-        In general, this operator is more commonly used. The difference from `ReLuV2` is that the `ReLuV2` will
-        output one more Mask.
-    Args:
-        input (Tensor): Input Tensor of numeric types.
-    Returns:
-        Tensor, has the same dtype and shape as `input_x`.
-    Raises:
-        TypeError: If dtype of `input` is not a number.
-        TypeError: If `input` is not a Tensor.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> output = ops.relu(input_x)
-        >>> print(output)
-        [[0. 4. 0.]
-         [2. 0. 9.]]
-    """
-    relu_ = _get_cache_prim(NN_OPS.ReLU)()
-    return relu_(input)
-def relu6(x):
-    r"""
-    Computes ReLU (Rectified Linear Unit) upper bounded by 6 of input tensors element-wise.
-    .. math::
-        \text{ReLU6}(x) = \min(\max(0,x), 6)
-    It returns :math:`\min(\max(0,x), 6)` element-wise.
-    Args:
-        x (Tensor): Tensor of shape :math:`(N, *)`,
-            where :math:`*` means any number of additional dimensions.
-            Data type must be float16, float32.
-    Returns:
-        Tensor, with the same dtype and shape as the `x`.
-    Raises:
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        TypeError: If `x` is not a Tensor.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> result = ops.relu6(input_x)
-        >>> print(result)
-        [[0. 4. 0.]
-         [2. 0. 6.]]
-    """
-    relu6_ = _get_cache_prim(NN_OPS.ReLU6)()
-    return relu6_(x)
-def prelu(x, weight):
-    r"""
-    Parametric Rectified Linear Unit activation function.
-    PReLU is described in the paper `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
-    ImageNet Classification <https://arxiv.org/abs/1502.01852>`_. Defined as follows:
-    .. math::
-        prelu(x_i)= \max(0, x_i) + \min(0, w * x_i),
-    where :math:`x_i` is an element of a channel of the input, `w` is the weight of the channel.
-    Note:
-        Scalar or 1-D Tensor is not supported on Ascend.
-    Args:
-        x (Tensor): The input Tensor of the activation function. The data type is float16 or float32.
-          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        weight (Tensor):  Weight Tensor. The data type is float16 or float32.
-          The weight can only be a Tensor, and the length is the same as the number of channels C of the `input_x`.
-          On GPU devices, when the input is a scalar, the shape is :math:`(1,)` .
-    Returns:
-        Tensor, with the same shape and dtype as `x`.
-        For detailed information, please refer to :class:`mindspore.nn.PReLU`.
-    Raises:
-        TypeError: If dtype of `x` or `weight` is neither float16 nor float32.
-        TypeError: If the `x` or the `weight` is not a Tensor.
-        ValueError: If the `x` is a 0-D or 1-D Tensor on Ascend.
-        ValueError: If the `weight` is not a 1-D Tensor.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.arange(-6, 6).reshape((2, 3, 2)), mindspore.float32)
-        >>> weight = Tensor(np.array([0.1, 0.6, -0.3]), mindspore.float32)
-        >>> output = ops.prelu(x, weight)
-        >>> print(output)
-        [[[-0.60 -0.50]
-          [-2.40 -1.80]
-          [ 0.60  0.30]]
-         [[ 0.00  1.00]
-          [ 2.00  3.00]
-          [ 4.0   5.00]]]
-    """
-    prelu_ = _get_cache_prim(NN_OPS.PReLU)()
-    return prelu_(x, weight)
 def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
     r"""
@@ -3581,7 +3890,7 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
         TypeError: If `lower` is not a float or an int.
         TypeError: If `upper` is not a float or an int.
         TypeError: If `input` is not a Tensor.
-        TypeError: If `input` is not a Tensor of mindspore.float16 or mindpore.float32.
+        TypeError: If `input` is not a Tensor of mindspore.float16 or mindspore.float32.
         ValueError: If `lower` is greater than upper.
     Supported Platforms:
@@ -3610,13 +3919,12 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
     _upper = Tensor(upper, mstype.float32)
     _size = input.shape
     if ops.is_sequence_value_unknown(_size):
-        dyn_shape = _get_cache_prim(P.TensorShape)()
-        _size = dyn_shape(input)
-    sign_matrix = _get_cache_prim(P.Sign)()(input)
+        _size = tensor_shape_(input)
+    sign_matrix = sign_(input)
     negative_filter = sign_matrix.clip(None, 0)
     positive_filter = sign_matrix.clip(0, None)
-    _dtype = _get_cache_prim(P.DType)()(input)
-    mask = ops.uniform(_size, _lower, _upper).astype(_dtype)
+    input_dtype = dtype_(input)
+    mask = ops.uniform(_size, _lower, _upper).astype(input_dtype)
     negative_mask = negative_filter * mask * -1
     total_mask = negative_mask + positive_filter
     out = total_mask * input
@@ -3684,6 +3992,21 @@ def _innner_log_softmax(inputs, axis):
     return inputs - logsumexp(inputs, axis, True)
+def _check_cross_entropy_inputs(input, target, weight, ignore_index, reduction, label_smoothing):
+    """
+    Check inputs for cross_entropy().
+    """
+    _check_is_tensor('input', input, "cross_entropy_loss")
+    _check_is_tensor('target', target, "cross_entropy_loss")
+    _check_is_tensor('weight', weight, "cross_entropy_loss")
+    check_int_const(ignore_index, 'ignore_index', "cross_entropy_loss")
+    check_non_negative_float_const(label_smoothing, 'label_smoothing', "cross_entropy_loss")
+    check_string_const(reduction, ['none', 'mean', 'sum'], 'reduction', "cross_entropy_loss")
+    if input.dtype not in [mstype.float64, mstype.float32, mstype.float16]:
+        raise TypeError(f'For cross_entropy, the input dtype should be mstype.float64, mstype.float32 or'
+                        f'mstype.float16, but got dtype:{input.dtype}.')
 def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
     r"""
     The cross entropy loss between input and target.
@@ -3741,7 +4064,7 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
             `input` is expected to be log-probabilities, data type must be float16 or float32.
         target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
             :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
-            :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
+            :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32 or float64.
         weight (Tensor): A rescaling weight applied to the loss of each batch element.
             If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
         ignore_index (int): Specifies a target value that is ignored
@@ -3774,12 +4097,7 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
         >>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
         >>> output = ms.ops.cross_entropy(inputs, target)
     """
-    _check_is_tensor('input', input, "cross_entropy_loss")
-    _check_is_tensor('target', target, "cross_entropy_loss")
-    _check_is_tensor('weight', weight, "cross_entropy_loss")
-    check_int_const(ignore_index, 'ignore_index', "cross_entropy_loss")
-    check_non_negative_float_const(label_smoothing, 'label_smoothing', "cross_entropy_loss")
-    check_string_const(reduction, ['none', 'mean', 'sum'], 'reduction', "cross_entropy_loss")
+    _check_cross_entropy_inputs(input, target, weight, ignore_index, reduction, label_smoothing)
     class_dim = 0 if input.ndim == 1 else 1
     if target.dtype in [mstype.float32, mstype.float16]:
         return _cross_entropy(input, target, class_dim, weight, reduction, label_smoothing)
@@ -3788,8 +4106,6 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
 def _cross_entropy(inputs, target, target_dim, weight=None, reduction='mean', label_smoothing=0.0):
     """cross entropy inner function"""
-    _ones_like = _get_cache_prim(P.OnesLike)()
     class_dim = 0 if inputs.ndim == 1 else 1
     n_classes = inputs.shape[class_dim]
     inputs = _innner_log_softmax(inputs, class_dim)
@@ -3797,7 +4113,7 @@ def _cross_entropy(inputs, target, target_dim, weight=None, reduction='mean', la
         target = target * (1 - label_smoothing) + label_smoothing / n_classes
     if weight is None:
-        weight = _ones_like(inputs)
+        weight = ones_like_(inputs)
     elif inputs.ndim != 1:
         broadcast_shape = [1 for _ in range(inputs.ndim)]
         broadcast_shape[1] = weight.shape[0]
@@ -3827,7 +4143,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
     N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
     classes.
-    If `reduction` is not ``None`` (default 'mean'), then
+    If `reduction` is not ``None`` (default ``'mean'``), then
     .. math::
@@ -3895,37 +4211,31 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
 def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, reduction='none', label_smoothing=0.0):
     """nll loss inner function"""
-    _neg = _get_cache_prim(P.Neg)()
-    _gather_d = _get_cache_prim(P.GatherD)()
-    _gather = _get_cache_prim(P.Gather)()
-    _ones_like = _get_cache_prim(P.OnesLike)()
-    _equal = _get_cache_prim(P.Equal)()
     if target.ndim == inputs.ndim - 1:
         target = target.expand_dims(target_dim)
     if ignore_index is not None:
-        non_pad_mask = _equal(target, ignore_index)
-        target = target.masked_fill(non_pad_mask, 0)
+        non_pad_mask = equal_(target, ignore_index)
+        target = target.masked_fill(non_pad_mask, ops.cast(0, target.dtype))
     else:
         non_pad_mask = target
     if weight is not None:
-        loss_weights = _gather(weight, target, 0)
+        loss_weights = gather_(weight, target, 0)
         orig_shape = inputs.shape
         if inputs.ndim != 2:
             inputs = inputs.view(orig_shape[:2] + (-1,))
             weight = weight.view(weight.shape + (1,))
         weighted_inputs = inputs * weight
         weighted_inputs = weighted_inputs.view(orig_shape)
-        loss = _neg(_gather_d(weighted_inputs, target_dim, target))
-        smooth_loss = _neg(weighted_inputs.sum(axis=target_dim, keepdims=True))
+        loss = neg_(gather_d_(weighted_inputs, target_dim, target))
+        smooth_loss = neg_(weighted_inputs.sum(axis=target_dim, keepdims=True))
     else:
-        loss = _neg(_gather_d(inputs, target_dim, target))
-        smooth_loss = _neg(inputs.sum(axis=target_dim, keepdims=True))
-        loss_weights = _ones_like(loss)
+        loss = neg_(gather_d_(inputs, target_dim, target))
+        smooth_loss = neg_(inputs.sum(axis=target_dim, keepdims=True))
+        loss_weights = ones_like_(loss)
     if ignore_index is not None:
-        loss = loss.masked_fill(non_pad_mask, 0.)
-        loss_weights = loss_weights.masked_fill(non_pad_mask, 0.)
-        smooth_loss = smooth_loss.masked_fill(non_pad_mask, 0.)
+        loss = loss.masked_fill(non_pad_mask, ops.cast(0, loss.dtype))
+        loss_weights = loss_weights.masked_fill(non_pad_mask, ops.cast(0, loss_weights.dtype))
+        smooth_loss = smooth_loss.masked_fill(non_pad_mask, ops.cast(0, smooth_loss.dtype))
     loss = loss.squeeze(target_dim)
     smooth_loss = smooth_loss.squeeze(target_dim)
@@ -3947,8 +4257,9 @@ def l1_loss(input, target, reduction='mean'):
     r"""
     Calculate the mean absolute error between the `input` value and the `target` value.
-    Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
-    then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
+    Assuming that the :math:`x` and :math:`y` (predicted and target value) are 1-D Tensor,
+    length :math:`N`, `reduction` is set to ``'none'``, then calculate the loss of
+    :math:`x` and :math:`y` without dimensionality reduction.
     The formula is as follows:
@@ -3957,7 +4268,7 @@ def l1_loss(input, target, reduction='mean'):
     where :math:`N` is the batch size.
-    If `reduction` is ``"mean"`` or ``"sum"`` , then:
+    If `reduction` is ``'mean'`` or ``'sum'`` , then:
     .. math::
         \ell(x, y) =
@@ -3978,13 +4289,13 @@ def l1_loss(input, target, reduction='mean'):
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
+        Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
         Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `input` is not a Tensor.
         TypeError: If `target` is not a Tensor.
-        ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -3992,8 +4303,8 @@ def l1_loss(input, target, reduction='mean'):
     Examples:
         >>> from mindspore import Tensor, ops
         >>> from mindspore import dtype as mstype
-        >>> x = ms.Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
-        >>> target = ms.Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
+        >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
+        >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
         >>> output = ops.l1_loss(x, target, reduction="mean")
         >>> print(output)
         3.0
@@ -4002,7 +4313,7 @@ def l1_loss(input, target, reduction='mean'):
     _check_is_tensor('target', target, "l1_loss")
     if reduction not in ('mean', 'sum', 'none'):
         raise ValueError(f"For l1_loss, the 'reduction' must be in ['mean', 'sum', 'none'], but got {reduction}.")
-    loss = _get_cache_prim(P.Abs)()(input - target)
+    loss = abs_(input - target)
     return _get_loss(loss, reduction, "l1_loss")
@@ -4037,6 +4348,7 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
     Args:
         input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+            Data type is float16, float32 or float64.
         target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
         beta (float): A parameter used to control the point where the function will change between
             L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
@@ -4115,13 +4427,13 @@ def threshold(input, thr, value):
     _check_is_tensor('input', input, "threshold")
     _check_value_type("thr", thr, [float, int], "threshold")
     _check_value_type("value", value, [float, int], "threshold")
-    cond = _get_cache_prim(P.Greater)()(input, thr)
+    cond = greater_(input, thr)
     input_type = input.dtype
     value = Tensor(value, input_type)
     input_shape = input.shape
-    shape_tensor = _get_cache_prim(TupleToTensor)()(input_shape, mstype.int64)
-    value = _get_cache_prim(P.FillV2)()(shape_tensor, value)
-    return _get_cache_prim(P.Select)()(cond, input, value)
+    shape_tensor = tuple_to_tensor_(input_shape, mstype.int64)
+    value = fillv2_(shape_tensor, value)
+    return select_(cond, input, value)
 def leaky_relu(input, alpha=0.2):
@@ -4139,6 +4451,11 @@ def leaky_relu(input, alpha=0.2):
     For more details, see `Rectifier Nonlinearities Improve Neural Network Acoustic Models
     <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`_.
+    LeakyReLU Activation Function Graph:
+    .. image:: ../images/LeakyReLU.png
+        :align: center
     Args:
         input (Tensor): The input of leaky_relu is a Tensor of any dimension.
         alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
@@ -4165,10 +4482,10 @@ def leaky_relu(input, alpha=0.2):
     """
     _check_is_tensor('input', input, "leaky_relu")
     _check_value_type("alpha", alpha, [float, int], "leaky_relu")
-    select_op = _get_cache_prim(P.Maximum)()
+    select_op = maximum_
     if alpha > 1:
-        select_op = _get_cache_prim(P.Minimum)()
-    alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
+        select_op = minimum_
+    alpha = cast_(F.scalar_to_tensor(alpha), input.dtype)
     return select_op(alpha * input, input)
@@ -4209,48 +4526,6 @@ def intopk(x1, x2, k):
     _in_topk = _get_cache_prim(P.InTopK)(k)
     return _in_topk(x1, x2)
-def log_softmax(logits, axis=-1):
-    r"""
-    Applies the Log Softmax function to the input tensor on the specified axis.
-    Supposes a slice in the given axis, :math:`x` for each element :math:`x_i`,
-    the Log Softmax function is shown as follows:
-    .. math::
-        \text{output}(x_i) = \log \left(\frac{\exp(x_i)} {\sum_{j = 0}^{N-1}\exp(x_j)}\right),
-    where :math:`N` is the length of the Tensor.
-    Args:
-        logits (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
-          additional dimensions, with float16 or float32 data type.
-        axis (int): The axis to perform the Log softmax operation. Default: ``-1`` .
-    Returns:
-        Tensor, with the same type and shape as the logits.
-    Raises:
-        TypeError: If `axis` is not an int.
-        TypeError: If dtype of `logits` is neither float16 nor float32.
-        ValueError: If `axis` is not in range [-len(logits.shape), len(logits.shape)).
-        ValueError: If dimension of `logits` is less than 1.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> logits = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
-        >>> output = ops.log_softmax(logits)
-        >>> print(output)
-        [-4.4519143 -3.4519143 -2.4519143 -1.4519144 -0.4519144]
-    """
-    _log_softmax = _get_cache_prim(P.LogSoftmax)(axis)
-    return _log_softmax(logits)
 def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"):
     r"""
     Local Response Normalization.
@@ -4319,6 +4594,11 @@ def mish(x):
     See more details in `A Self Regularized Non-Monotonic Neural Activation Function
     <https://arxiv.org/abs/1908.08681>`_.
+    Mish Activation Function Graph:
+    .. image:: ../images/Mish.png
+        :align: center
     Args:
         x (Tensor): The input Tensor.
             Supported dtypes:
@@ -4385,21 +4665,15 @@ def _get_loss(x, reduction, cls_name, weights=1.0):
     if reduction not in ('mean', 'sum', 'none'):
         raise ValueError(f"For '{cls_name}', the 'reduction' must be in ['mean', 'sum', 'none'], "
                          f"but got {reduction}.")
-    reduce_mean = P.ReduceMean()
-    reduce_sum = P.ReduceSum()
-    mul = P.Mul()
-    cast = P.Cast()
     input_dtype = x.dtype
-    x = cast(x, mstype.float32)
-    weights = cast(weights, mstype.float32)
-    x = mul(weights, x)
+    x = cast_(x, mstype.float32)
+    weights = cast_(weights, mstype.float32)
+    x = mul_(weights, x)
     if reduction == 'mean':
-        x = reduce_mean(x, _get_axis(x))
+        x = reduce_mean_(x, _get_axis(x))
     if reduction == 'sum':
-        x = reduce_sum(x, _get_axis(x))
-    x = cast(x, input_dtype)
+        x = reduce_sum_(x, _get_axis(x))
+    x = cast_(x, input_dtype)
     return x
@@ -4410,20 +4684,6 @@ def check_input_dtype(param_name1, input_data1, param_name2, input_data2, cls_na
                         f'but got {param_name1} dtype:{input_data1.dtype}, {param_name2} dtype:{input_data2.dtype}.')
-def check_input_shape(param_name1, input_data1, param_name2, input_data2, cls_name):
-    """Check the shape of input1 and input2."""
-    if input_data1.shape != input_data2.shape:
-        raise ValueError(f'For {cls_name}, the {param_name1} shape should be equal to {param_name2} shape, '
-                         f'but got {param_name1} shape:{input_data1.shape}, {param_name2} shape:{input_data2.shape}.')
-def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data2, cls_name):
-    """check input1 and input2 type and shape same"""
-    check_input_dtype(param_name1, input_data1, param_name2, input_data2, cls_name)
-    check_input_shape(param_name1, input_data1, param_name2, input_data2, cls_name)
-    return 0
 def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
     r"""
     MarginRankingLoss creates a criterion that measures the loss.
@@ -4448,7 +4708,7 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
+        Tensor or Scalar. if `reduction` is ``'none'``, its shape is the same as `input1`.
         Otherwise, a scalar value will be returned.
     Raises:
@@ -4478,10 +4738,9 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
     _check_is_tensor('input1', input1, "margin_ranking_loss")
     _check_is_tensor('input2', input2, "margin_ranking_loss")
     _check_is_tensor('target', target, "margin_ranking_loss")
-    maximum = P.Maximum()
-    _check_type_and_shape_same('input1', input1, 'input2', input2, 'margin_ranking_loss')
-    _check_type_and_shape_same('target', target, 'input1', input1, 'margin_ranking_loss')
-    x = maximum(-target * (input1 - input2) + margin, 0)
+    check_input_dtype('input1', input1, 'input2', input2, 'margin_ranking_loss')
+    check_input_dtype('target', target, 'input1', input1, 'margin_ranking_loss')
+    x = maximum_(-target * (input1 - input2) + margin, 0)
     return _get_loss(x, reduction, "margin_ranking_loss")
@@ -4509,7 +4768,7 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
         input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
         target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
           :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
-        margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
+        margin (float, optional): Should be in [-1.0, 1.0]. Default: ``0.0``.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -4524,7 +4783,7 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
     Raises:
         TypeError: If `margin` is not a float.
         ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
-        ValueError: If `margin` is not in range [-1, 1].
+        ValueError: If `margin` is not in range [-1.0, 1.0].
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -4544,7 +4803,7 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
     _check_is_tensor('input1', input1, "ops.cosine_embedding_loss")
     _check_is_tensor('input2', input2, "ops.cosine_embedding_loss")
     _check_is_tensor('target', target, "ops.cosine_embedding_loss")
-    _check_type_and_shape_same('input1', input1, 'input2', input2, 'ops.cosine_embedding_loss')
+    check_input_dtype('input1', input1, 'input2', input2, 'ops.cosine_embedding_loss')
     _check_reduced_shape_valid(ops.shape(input1), ops.shape(target), (1,),
                                "ops.cosine_embedding_loss", "input1", "target")
     if input1.dtype in (mstype.int32, mstype.int64):
@@ -4558,14 +4817,14 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
     if margin_f > 1.0 or margin_f < -1.0:
         raise ValueError(f"For ops.cosine_embedding_loss, the value of 'margin' should be in [-1, 1],"
                          f"but got {margin_f}.")
-    prod_sum = _get_cache_prim(P.ReduceSum)()(input1 * input2, (1,))
-    square1 = _get_cache_prim(P.ReduceSum)()(ops.square(input1), (1,))
-    square2 = _get_cache_prim(P.ReduceSum)()(ops.square(input2), (1,))
+    prod_sum = reduce_sum_(input1 * input2, (1,))
+    square1 = reduce_sum_(ops.square(input1), (1,))
+    square2 = reduce_sum_(ops.square(input2), (1,))
     denom = ops.sqrt(square1) * ops.sqrt(square2)
     cosine = prod_sum / denom
     pos_value = 1.0 - cosine
-    neg_value = _get_cache_prim(P.Maximum)()(cosine - margin_f, 0.0)
+    neg_value = maximum_(cosine - margin_f, 0.0)
     zeros = ops.zeros_like(cosine)
     pos_part = ops.select(target == 1, pos_value, zeros)
     neg_part = ops.select(target == -1, neg_value, zeros)
@@ -4622,7 +4881,7 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
             W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
             (\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
-        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
+        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be returned
           only when `return_indices` is ``True`` .
     Raises:
@@ -4700,9 +4959,9 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
         padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
             "reflection". Default: ``'zeros'`` .
-        align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
-            the center points of the input’s corner pixels. If set to `False`, they are instead considered as referring
-            to the corner points of the input’s corner pixels, making the sampling more resolution agnostic. Default:
+        align_corners (bool): If set to `True`, the extrema (-1 and 1) are considered as referring to
+            the center points of the input's corner pixels. If set to `False`, they are instead considered as referring
+            to the corner points of the input's corner pixels, making the sampling more resolution agnostic. Default:
             ``False`` .
     Returns:
@@ -4775,8 +5034,8 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
         log_probs (Tensor): A tensor of shape :math:`(T, N, C)`, where T is input length, N is batch size and C is
             number of classes (including blank).
         targets (Tensor): Target sequences. A tensor of shape :math:`(N, S)`, where S is max target length.
-        input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
-        target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
+        input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape :math:`(N)`.
+        target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape :math:`(N)`.
         blank (int, optional): The blank label. Default: ``0`` .
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -4923,30 +5182,17 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
     if not x.shape == var.shape:
         if x.shape[:-1] == var.shape:
             var = var.unsqueeze(dim=-1)
-        # Heterosclerotic case
-        elif x.shape[:-1] == var.shape[:-1] and var.shape[-1] == 1:
-            pass
-        else:
-            raise ValueError(f"For 'gaussian_nll_loss', 'var' must be able to correctly broadcast to 'x' and 'target'.")
-    max_op = P.Maximum()
-    log_op = P.Log()
-    square_op = P.Square()
-    maxima = max_op(var, eps)
-    logarithm = log_op(maxima)
-    squared_loss = square_op(x - target)
+    maxima = maximum_(var, eps)
+    logarithm = log_(maxima)
+    squared_loss = square_(x - target)
     c = 0 if not full else 0.5 * log(2 * pi)
     loss = 0.5 * (logarithm + squared_loss / maxima) + c
     if reduction == 'mean':
         loss = loss.mean()
     elif reduction == 'sum':
         loss = loss.sum()
-    return loss
-@_primexpr
-def _check_hinge_embedding_loss(shape, shape2):
-    if shape2 != shape:
-        raise ValueError(f"For 'HingeEmbeddingLoss' the input tensor and the labels must have the same shape.")
+    return loss
 @_primexpr
@@ -4996,7 +5242,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
         inputs (Tensor): Predicted values, represented as :math:`x` in the formula.
         targets (Tensor): Label values, represented as :math:`y` in the formula.
             Has the same shape as `inputs`, contains -1 or 1.
-        margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
+        margin (float, int): Threshold defined by Hinge Embedding Loss `margin`.
             Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -5006,7 +5252,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
+        Tensor or Tensor scalar, the computed loss depending on `reduction`.
     Raises:
         TypeError: If `inputs` is not a Tensor.
@@ -5021,7 +5267,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
     Examples:
         >>> import numpy as np
         >>> import mindspore.common.dtype as mstype
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> from mindspore import Tensor
         >>> arr1 = np.array([0.9, -1.2, 2, 0.8, 3.9, 2, 1, 0, -1]).reshape((3, 3))
         >>> arr2 = np.array([1, 1, -1, 1, -1, 1, -1, 1, 1]).reshape((3, 3))
@@ -5034,9 +5280,6 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
     inputs_dtype = inputs.dtype
     targets_dtype = targets.dtype
     _check_hinge_embedding_loss_type(inputs_dtype, targets_dtype, inputs, targets, margin, reduction)
-    _shape = inputs.shape
-    _t_shape = targets.shape
-    _check_hinge_embedding_loss(_shape, _t_shape)
     min_val = Tensor(0, inputs_dtype)
     pos_index = targets > 0
@@ -5257,21 +5500,25 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
     Here are the indices' meanings:
-    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
-    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
+    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
+      where :math:`N` is the batch size of the input.
+    - :math:`j` corresponds to the output channel, ranging from :math:`[0, C_{out}-1]`,
+      where :math:`C_{out}` is the number of
       output channels, which is also equal to the number of kernels.
-    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+    - :math:`k` corresponds to the input channel, ranging from :math:`[0, C_{in}-1]`,
+      where :math:`C_{in}` is the number of
       input channels, which is also equal to the number of channels in the convolutional kernels.
-    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
-    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
     kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
     channel in the :math:`i`-th batch of the input feature map.
-    The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
-    where :math:`kernel\_size` is the width of the kernel.
+    The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
+    where :math:`\text{kernel_size}` is the width of the kernel.
     If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
     will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
     where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
@@ -5282,7 +5529,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     Note:
         On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
-        That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
+        That is, when `groups>1`, condition :math:`C_{in}` = :math:`C_{out}` = `groups` must be satisfied.
     Args:
         input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
@@ -5342,7 +5589,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.arange(64).reshape((4, 4, 4)), mindspore.float32)
         >>> weight = Tensor(np.arange(8).reshape((2, 2, 2)), mindspore.float32)
-        >>> bias = Tensor([-0.12345, 2.7683], ms.float32)
+        >>> bias = Tensor([-0.12345, 2.7683], mindspore.float32)
         >>> output = ops.conv1d(x, weight, pad_mode='pad', padding=(1,), bias=bias, groups=2)
         >>> print(output.shape)
         (4, 2, 5)
@@ -5351,13 +5598,12 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         raise ValueError(f"For 'conv1d', the input must be a 3D Tensor, but got input of {input.ndim}D.")
     if weight.ndim != 3:
         raise ValueError(f"For 'conv1d', the weight must be a 3D Tensor, but got input of {weight.ndim}D.")
-    _expand = _get_cache_prim(P.ExpandDims)()
-    expanded_input = _expand(input, 2)
+    expanded_input = expand_dims_(input, 2)
     sqz = _get_cache_prim(P.Squeeze)(2)
     weight_shape = weight.shape
     out_channel = weight_shape[0]
     kernel_size = (1, weight_shape[2])
-    expanded_weight = _expand(weight, 2)
+    expanded_weight = expand_dims_(weight, 2)
     if isinstance(padding, int):
         padding = (0, 0, padding, padding)
     elif isinstance(padding, (tuple, list)):
@@ -5406,12 +5652,15 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
     Here are the indices' meanings:
-    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
-    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
-      output channels, which is also equal to the number of kernels.
+    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
+      where :math:`N` is the batch size of the input.
+    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
+      where :math:`C_{out}` is the number of output channels, which is also equal to the number of kernels.
-    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
+      where :math:`C_{in}` is the number of
       input channels, which is also equal to the number of channels in the convolutional kernels.
     Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
@@ -5419,8 +5668,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
     channel in the :math:`i`-th batch of the input feature map.
-    The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
-    where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
+    The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the kernel,
+    respectively.
     If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
     will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
     where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
@@ -5431,7 +5681,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     Note:
         On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
-        That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
+        That is, when `groups>1`, condition :math:`C_{in}` = :math:`C_{out}` = `groups` must be satisfied.
     Args:
         input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -5476,7 +5726,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
         TypeError: `groups` is not an int.
         TypeError: If `bias` is not a Tensor.
-        ValueError: If  the shape of `bias` is not :math:`C_{out}` .
+        ValueError: If  the shape of `bias` is not :math:`(C_{out})` .
         ValueError: If `stride` or `dilation` is less than 1.
         ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
         ValueError: If `padding` is a tuple/list whose length is not equal to 2.
@@ -5523,6 +5773,75 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     return output
+def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
+    r"""
+    Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
+    also called deconvolution (although it is not an actual deconvolution).
+    The input is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
+    where :math:`N` is batch size, :math:`C_{in}` is space dimension,
+    :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively.
+    When Conv2d and Conv2dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad',
+    :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the height and width
+    directions of the input, they are inverses of each other in regard to the input and output shapes in this case.
+    However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
+    can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+        weight (Tensor): Tensor of shape
+            :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
+            is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is ``None`` , zeros will be used. Default: ``None`` .
+        stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
+            the height and width of movement are both strides, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: ``1`` .
+        padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`.
+            Can be an integer or a tuple/list with 2 integers.
+        output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output.
+            The data type is an integer or a tuple of two integers. If `output_padding` is an integer,
+            then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of
+            2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]`
+            respectively.
+        groups (int, optional): Splits `input` into groups. Default: ``1`` .
+        dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
+            2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
+            there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
+            be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
+    Returns:
+        Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv2dTranspose` for more details.
+    Raises:
+        TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
+        TypeError: `groups` is not an int.
+        TypeError: If `bias` is not a Tensor.
+        ValueError: If  the shape of `bias` is not :math:`(C_{out})` .
+        ValueError: If `stride` or `dilation` is less than 1.
+        ValueError: If `padding` is a tuple/list whose length is not equal to 2.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> x = Tensor(np.ones([1, 6, 32, 32]), mindspore.float32)
+        >>> weight = Tensor(np.ones([6, 3, 5, 5]), mindspore.float32)
+        >>> output = ops.conv_transpose2d(x, weight)
+        >>> print(output.shape)
+        (1, 3, 36, 36)
+    """
+    conv = _get_cache_prim(Convolution)(stride, padding, dilation, True, output_padding, groups)
+    return conv(input, weight, bias)
 def hardsigmoid(input):
     r"""
     Hard sigmoid activation function.
@@ -5537,6 +5856,11 @@ def hardsigmoid(input):
     where :math:`x_i` is an element of the input Tensor.
+    HSigmoid Activation Function Graph:
+    .. image:: ../images/HSigmoid.png
+        :align: center
     Args:
         input (Tensor): The input Tensor.
@@ -5576,10 +5900,15 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
     Linear region range :math:`[min\_val, max\_val]` can be adjusted using `min_val` and `max_val`.
+    Hardtanh Activation Function Graph:
+    .. image:: ../images/Hardtanh.png
+        :align: center
     Args:
         input (Tensor): Input Tensor.
-        min_val (Union[int, float]): Minimum value of the linear region range. Default: ``-1.0`` .
-        max_val (Union[int, float]): Maximum value of the linear region range. Default: ``1.0`` .
+        min_val (Union[int, float], optional): Minimum value of the linear region range. Default: ``-1.0`` .
+        max_val (Union[int, float], optional): Maximum value of the linear region range. Default: ``1.0`` .
     Returns:
         Tensor, with the same dtype and shape as `input`.
@@ -5604,18 +5933,18 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
     _check_value_type("min_val", min_val, [int, float], "hardtanh")
     _check_value_type("max_val", max_val, [int, float], "hardtanh")
     input_dtype = input.dtype
-    input = _get_cache_prim(P.Maximum)()(input, min_val)
-    input = _get_cache_prim(P.Minimum)()(input, max_val)
+    input = maximum_(input, min_val)
+    input = minimum_(input, max_val)
     return input.astype(input_dtype)
 def huber_loss(input, target, reduction='mean', delta=1.0):
     r"""
     Calculates the error between the predicted value and the target value,
-    which has the best of both the loss of l1 and the loss of mse.
+    which has the best of both the loss of :func:`mindspore.ops.l1_loss` and the loss of :func:`mindspore.ops.mse_loss`.
     Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
-    is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
+    is set to ``'none'`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
     The formula is as follows:
     .. math::
@@ -5656,14 +5985,14 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
             The value must be greater than zero. Default: ``1.0`` .
     Returns:
-        Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
+        Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
         Otherwise, a scalar value will be returned.
     Raises:
         TypeError: If `input` or `target` is not a Tensor.
         TypeError: If dtype of `delta` is neither float nor int.
         ValueError: If `delta` is less than or equal to 0.
-        ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
         ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
     Supported Platforms:
@@ -5682,14 +6011,12 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
     _check_is_tensor('target', target, "huber_loss")
     _check_value_type("delta", delta, [int, float], "huber_loss")
     _check_number_gt_value("delta", delta, 0.0, "huber_loss")
-    sub = _get_cache_prim(P.Sub)()
-    multi = _get_cache_prim(P.Mul)()
-    z = sub(input, target)
-    z = _get_cache_prim(P.Abs)()(z)
-    cond = _get_cache_prim(P.Less)()(z, delta)
-    l1 = multi(0.5, _get_cache_prim(P.Square)()(z))
-    l2 = multi(delta, sub(z, 0.5 * delta))
-    loss = _get_cache_prim(P.Select)()(cond, l1, l2)
+    z = sub_(input, target)
+    z = abs_(z)
+    cond = less_(z, delta)
+    l1 = mul_(0.5, square_(z))
+    l2 = mul_(delta, sub_(z, 0.5 * delta))
+    loss = select_(cond, l1, l2)
     return _get_loss(loss, reduction, "huber_loss")
@@ -5741,7 +6068,7 @@ def adaptive_avg_pool1d(input, output_size):
     """
     def _check(x, output_size):
         x_in_shape = x.shape
-        x_dtype = _get_cache_prim(P.DType)()(x)
+        x_dtype = dtype_(x)
         if not isinstance(x, (Tensor, Tensor_)):
             raise TypeError("For adaptive_avg_pool1d, the input input must be tensor")
@@ -5761,24 +6088,203 @@ def adaptive_avg_pool1d(input, output_size):
     _check(input, output_size)
     x_in_shape = input.shape
-    expand_ = _get_cache_prim(P.ExpandDims)()
     squeeze_ = _get_cache_prim(P.Squeeze)(2)
     width = x_in_shape[2]
     stride = width // output_size
     kernel_size = width - (output_size - 1) * stride
     stride = (1, width // output_size)
     kernel_size = (1, kernel_size)
     avg_pool_ = _get_cache_prim(P.AvgPool)(kernel_size=kernel_size, strides=stride)
-    input = expand_(input, 2)
+    input = expand_dims_(input, 2)
     input = avg_pool_(input)
     input = squeeze_(input)
     return input
+def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
+    r"""Applies the Layer Normalization on the mini-batch input.
+    Layer normalization is widely used in recurrent neural networks. Apply normalization to the mini-batch
+    input of a single training case. LayerNorm is described in the paper
+    `Layer Normalization <https://arxiv.org/abs/1607.06450>`_.
+    Unlike batch normalization, layer normalization performs the exact same calculations at training and
+    test time. Applies to all channels and pixels, even batch_size=1. The formula is as follows:
+    .. math::
+        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+    where :math:`\gamma` is the weight value learned through training, :math:`\beta` is the bias value
+    learned through training.
+    Args:
+        input (Tensor): The shape of input is `(N, *)`, where `*` represents any additional dimension.
+        normalized_shape (Union(int, tuple[int], list[int])): The normalized shape of `input` for LayerNorm.
+          `normalized_shape` equal to `input_shape[begin_norm_axis:]`, where `begin_norm_axis` represents the axis
+          where normalization begins.
+        weight (Tensor, optional): Learnable parameter :math:`\gamma` . Tensor of shape `normalized_shape`.
+          Default: ``None``, has the same data type with `input`. Initialized to ``1`` when `weight` is None.
+        bias (Tensor, optional): Learnable parameter :math:`\beta` . Tensor of shape `normalized_shape`.
+          Default: ``None``, has the same data type with `input`. Initialized to ``0`` when `bias` is None.
+        eps (float, optional): A value added to the denominator for numerical stability(:math:`\epsilon`).
+          Default: ``1e-5`` .
+    Returns:
+        Tensor. The normalized tensor, has the same type and shape as the `input`.
+    Raises:
+        TypeError: If `input` is not a Tensor.
+        TypeError: If `normalized_shape` is not an integer, a list or a tuple.
+        TypeError: If `eps` is not a float.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input_x = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32)
+        >>> normalized_shape = (3,)
+        >>> gamma = Tensor(np.ones(normalized_shape), mindspore.float32)
+        >>> beta = Tensor(np.zeros(normalized_shape), mindspore.float32)
+        >>> eps = 1e-7
+        >>> output = ops.layer_norm(input_x, normalized_shape, gamma, beta, eps)
+        >>> print(output)
+        [[-1.2247448 0. 1.2247448]
+         [-1.2247448 0. 1.2247448]]
+    """
+    if weight is None:
+        weight = ops.ones(normalized_shape, dtype=input.dtype)
+    if bias is None:
+        bias = ops.zeros(normalized_shape, dtype=input.dtype)
+    return layer_norm_ext_op(input, normalized_shape, weight, bias, eps)[0]
+def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
+    r"""Group Normalization over a mini-batch of inputs.
+    Group Normalization is widely used in recurrent neural networks. It applies
+    normalization on a mini-batch of inputs for each single training case as described
+    in the paper `Group Normalization <https://arxiv.org/pdf/1803.08494.pdf>`_. Group Normalization
+    divides the channels into groups and computes within each group the mean and variance for normalization,
+    and it performs very stable over a wide range of batch size. :math:`\gamma` and :math:`\beta` are trainable scale
+    and shift.
+    It can be described using the following formula:
+    .. math::
+        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+    where :math:`\gamma` is `weight`, :math:`\beta` is `bias`, :math:`\epsilon` is `eps`.
+    Args:
+        input (Tensor): The input feature with shape :math:`(N, C, *)` where :math:`*` means, any number of
+            additional dimensions.
+        num_groups (int): The number of groups to be divided along the channel dimension.
+        weight (Tensor, optional): The shape :math:`(C,)`, Default: ``None``, has the same data type with `input`.
+        bias (Tensor, optional): The shape :math:`(C,)`, Default: ``None``, has the same data type with `input`.
+        eps (float, optional): A value added to the denominator for numerical stability. Default: ``1e-5`` .
+    Returns:
+        Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `input`.
+    Raises:
+        TypeError: If `num_groups` is not an int.
+        TypeError: If `eps` is not a float.
+        ValueError: If `num_groups` is less than 1.
+        ValueError: If `C` (the second parameter of dimensions of `input`) is not divided by `num_groups`.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore as ms
+        >>> import numpy as np
+        >>> from mindspore import ops
+        >>> x = ms.Tensor(np.ones([1, 2, 4, 4], np.float32))
+        >>> output = ops.group_norm(x, 2)
+        >>> print(output)
+        [[[[0. 0. 0. 0.]
+           [0. 0. 0. 0.]
+           [0. 0. 0. 0.]
+           [0. 0. 0. 0.]]
+          [[0. 0. 0. 0.]
+           [0. 0. 0. 0.]
+           [0. 0. 0. 0.]
+           [0. 0. 0. 0.]]]]
+    """
+    if weight is None:
+        weight = ops.ones([input.shape[1]], dtype=input.dtype)
+    if bias is None:
+        bias = ops.zeros([input.shape[1]], dtype=input.dtype)
+    return group_norm_op(input, num_groups, weight, bias, eps)[0]
+def batch_norm_ext(input, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-5):
+    r"""
+    Batch Normalization for input data and updated parameters.
+    Batch Normalization is widely used in convolutional neural networks. This operation
+    applies Batch Normalization over inputs to avoid internal covariate shift as described
+    in the paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal
+    Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
+    features using a mini-batch of data and the learned parameters can be described
+    in the following formula,
+    .. math::
+        y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
+    where :math:`\gamma` is `weight`, :math:`\beta` is `bias`, :math:`\epsilon` is `eps`, :math:`mean` is the
+    mean of :math:`x`, :math:`variance` is the variance of :math:`x`.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, C, *)`, with bfloat16, float16 or float32 data type.
+            For Atlas training products, the shape must be 2-4 dimensions currently.
+        running_mean (Tensor): The shape :math:`(C,)`, with bfloat, float16 or float32 data type.
+        running_var (Tensor): The shape :math:`(C,)`, with bfloat, float16 or float32 data type.
+        weight (Tensor, optional): The shape :math:`(C,)`, with bfloat, float16 or float32 data type, Default: ``None``.
+            Initialized to ``1`` when `weight` is None.
+        bias (Tensor, optional): The shape :math:`(C,)`, with bfloat, float16 or float32 data type. Default: ``None``.
+            Initialized to ``0`` when `weight` is None.
+        training (bool, optional): If `training` is `True`, `mean` and `variance` are computed during training.
+            If `training` is `False`, they're loaded from checkpoint during inference. Default: ``False`` .
+        momentum (float, optional): The hyper parameter to compute moving average for `running_mean` and `running_var`
+            (e.g. :math:`new\_running\_mean = (1 - momentum) * running\_mean + momentum * current\_mean`).
+            Default: ``0.1`` .
+        eps (float, optional): A small value added for numerical stability. Default: ``1e-5``.
+    Returns:
+        Tensor, has the same type and shape as `input`. The shape is :math:`(N, C, *)`.
+    Raises:
+        TypeError: If `training` is not a bool.
+        TypeError: If dtype of `eps` or `momentum` is not float.
+        TypeError: If `input`, `weight`, `bias`, `running_mean` or `running_var` is not a Tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> input_x = Tensor([[1.0, 2.0], [3.0, 4.0]], mindspore.float32)
+        >>> running_mean = Tensor([0.5, 1.5], mindspore.float32)
+        >>> running_var = Tensor([0.1, 0.2], mindspore.float32)
+        >>> weight = Tensor([2.0, 2.0], mindspore.float32)
+        >>> bias = Tensor([-1.0, -1.0], mindspore.float32)
+        >>> output = ops.function.nn_func.batch_norm_ext(input_x, running_mean, running_var, weight, bias)
+        >>> print(output)
+        [[ 2.1621194  1.2360122]
+         [14.810596  10.180061 ]]
+    """
+    if weight is None:
+        weight = ops.ones([input.shape[1]], dtype=input.dtype)
+    if bias is None:
+        bias = ops.zeros([input.shape[1]], dtype=input.dtype)
+    output = batch_norm_ext_op(input, weight, bias, running_mean, running_var, training, momentum, eps)
+    return output[0]
 def batch_norm(input_x, running_mean, running_var, weight, bias, training=False, momentum=0.1, eps=1e-5):
     r"""
     Batch Normalization for input data and updated parameters.
@@ -5798,7 +6304,8 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
     mean of :math:`x`, :math:`variance` is the variance of :math:`x`.
     .. warning::
-        - For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+        - For Atlas 200/300/500 inference product,
+          the result accuracy fails to reach 1‰ due to the square root instruction.
     Note:
         - If `training` is `False`, `weight`, `bias`, `running_mean` and `running_var` are Tensors.
@@ -5915,6 +6422,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
     Args:
         logits (Tensor): The predictive value whose data type must be float16 or float32.
         labels (Tensor): The target value which has the same shape and data type as `logits`.
+            And the data type is float16 or float32.
         weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
             Its shape must be able to broadcast to that of `logits` and `labels`.
             And it must have the same shape and data type as `logits`. Default: ``None`` . If set to ``None`` ,
@@ -5960,7 +6468,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     r"""
     Applies a 3D convolution over an input tensor. The input tensor is typically of
     shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
-    is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
+    is channel number, :math:`D, H, W` are the depth, height and width of the feature graph, respectively.
     The output is calculated based on formula:
@@ -5970,26 +6478,30 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
     where :math:`bias` is the output channel bias, :math:`ccor` is
-    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_
     , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
     Here are the indices' meanings:
-    - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
-    - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
+    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
+      where :math:`N` is the batch size of the input.
+    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
+      where :math:`C_{out}` is the number of
       output channels, which is also equal to the number of kernels.
-    - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
+    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
+      where :math:`C_{in}` is the number of
       input channels, which is also equal to the number of channels in the convolutional kernels.
-    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
-    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
     kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
     channel in the :math:`i`-th batch of the input feature map.
     The shape of the convolutional kernel is given by
     :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
-    where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
+    where :math:`\text{kernel_size[0]}` , :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth,
     height and width of the kernel, respectively.
     If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
     will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
@@ -6000,8 +6512,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
     Note:
-        1. On Ascend platform, `groups = 1` must be satisfied.
-        2. On Ascend dilation on depth only supports the case of 1.
+        1. On Ascend platform, :math:`groups = 1` must be satisfied.
+        2. On Ascend platform, :math:`dilation=1` must be satisfied.
     Args:
         input (Tensor): Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
@@ -6040,8 +6552,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
             there will be :math:`k - 1` pixels skipped for each sampling location.
             The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
             respectively. Default: ``1`` .
-        groups (int, optional):The number of groups into which the filter is divided. `in_channels`
-            and `out_channels` must be divisible by `group`. Default: ``1`` .
+        groups (int, optional):The number of groups into which the filter is divided. Default: ``1`` .
     Returns:
         Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
@@ -6083,7 +6594,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         TypeError: If `out_channel` or `groups` is not an int.
         TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
         TypeError: If `bias` is not a Tensor.
-        ValueError: If the shape of `bias` is not :math:`C_{out}`.
+        ValueError: If the shape of `bias` is not :math:`(C_{out})`.
         ValueError: If `stride` or `dilation` is less than 1.
         ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
         ValueError: If `padding` is a tuple or list whose length is not equal to 3.
@@ -6191,21 +6702,19 @@ def pixel_shuffle(input, upscale_factor):
     _check_positive_int(upscale_factor, "upscale_factor")
     _check_is_tensor("input", input, "pixel_shuffle")
     _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_shuffle")
-    idx = P.Shape()(input)
+    idx = shape_(input)
     length = input.ndim
     pre = idx[:-3]
     c, h, w = idx[-3:]
     _check_pxiel_shuffle_valid(c, upscale_factor)
     c = c // upscale_factor ** 2
     input_perm = (pre + (c, upscale_factor, upscale_factor, h, w))
-    reshape = _get_cache_prim(P.Reshape)()
-    transpose = _get_cache_prim(P.Transpose)()
-    input = reshape(input, input_perm)
+    input = reshape_(input, input_perm)
     input_perm = [i for i in range(length - 2)]
     input_perm = input_perm + [length, length - 2, length + 1, length - 1]
     input_perm = tuple(input_perm)
-    input = transpose(input, input_perm)
-    input = reshape(input, (pre + (c, upscale_factor * h, upscale_factor * w)))
+    input = transpose_(input, input_perm)
+    input = reshape_(input, (pre + (c, upscale_factor * h, upscale_factor * w)))
     return input
@@ -6256,7 +6765,7 @@ def pixel_unshuffle(input, downscale_factor):
     _check_positive_int(downscale_factor, "downscale_factor")
     _check_is_tensor("input", input, "pixel_unshuffle")
     _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_unshuffle")
-    idx = P.Shape()(input)
+    idx = shape_(input)
     length = input.ndim
     pre = idx[:-3]
     c, h, w = idx[-3:]
@@ -6264,14 +6773,12 @@ def pixel_unshuffle(input, downscale_factor):
     h = h // downscale_factor
     w = w // downscale_factor
     input_perm = (pre + (c, h, downscale_factor, w, downscale_factor))
-    reshape = _get_cache_prim(P.Reshape)()
-    transpose = _get_cache_prim(P.Transpose)()
-    input = reshape(input, input_perm)
+    input = reshape_(input, input_perm)
     input_perm = [i for i in range(length - 2)]
     input_perm = input_perm + [length - 1, length + 1, length - 2, length]
     input_perm = tuple(input_perm)
-    input = transpose(input, input_perm)
-    input = reshape(input, (pre + (c * downscale_factor * downscale_factor, h, w)))
+    input = transpose_(input, input_perm)
+    input = reshape_(input, (pre + (c * downscale_factor * downscale_factor, h, w)))
     return input
@@ -6288,7 +6795,7 @@ def glu(x, axis=-1):
     See `Language Modeling with Gated Convluational Networks <https://arxiv.org/abs/1612.08083>`_.
     Args:
-        x (Tensor): Tensor to be splited. Its dtype is Number, and shape is :math:`(\ast_1, N, \ast_2)`
+        x (Tensor): Tensor to be split. Its dtype is Number, and shape is :math:`(\ast_1, N, \ast_2)`
             where `*` means, any number of additional dimensions.
         axis (int, optional): the axis to split the input. It must be int. Default: ``-1`` , the last axis of `x`.
@@ -6310,9 +6817,6 @@ def glu(x, axis=-1):
         [[0.05744425 0.11973753]
          [0.33409387 0.41398472]]
     """
-    if not isinstance(x, Tensor) or x.size == 0:
-        raise TypeError("glu does not support scalars because halving size must be even")
     spilt = _get_cache_prim(P.Split)(axis=axis, output_num=2)
     x, y = spilt(x)
     y = sigmoid_(y)
@@ -6332,7 +6836,7 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
     .. math::
         \text{loss}(x, y) = \frac{\sum_i \max(0, \text{margin} - x[y] + x[i])^p}{\text{x.size}(0)}
-    where :math:`i\in \{0,⋯,x.size(0)−1\}` and :math:`i \ne y`.
+    where :math:`i\in \{0,⋯,x.size(0)-1\}` and :math:`i \ne y`.
     Args:
         input (Tensor): Input , with shape :math:`(N, C)`. Data type only support float32, float16 or float64.
@@ -6351,8 +6855,8 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
-        Otherwise, it is a scalar.
+        - **outputs** - Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
+          Otherwise, it is a scalar.
     Raises:
         TypeError: If dtype of `p` or `target` is not int.
@@ -6411,10 +6915,11 @@ def multilabel_margin_loss(input, target, reduction='mean'):
     This allows for different samples to have variable amounts of target classes.
     Args:
-        input (Tensor): Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N`
-            is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32.
-        target (Tensor): Ground truth data, with the same shape as `input`, data type must be int32 and
-            label targets padded by -1.
+        input (Tensor): Predict data, :math:`x` in the formula above. Tensor of shape :math:`(C)`
+            or :math:`(N, C)`, where :math:`N` is the batch size and :math:`C` is the number of classes.
+            Data type must be float16 or float32.
+        target (Tensor): Ground truth data, :math:`y` in the formula above, with the same shape as `input`,
+            data type must be int32 and label targets padded by -1.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -6490,9 +6995,6 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
         Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
         its shape is :math:`(N)` , otherwise it is zero.
-    Raises:
-        ValueError: If the rank of `input` or `target` is not 2.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6507,81 +7009,22 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
     cls_name = "multilabel_soft_margin_loss"
     _check_is_tensor('input', input, cls_name)
     _check_is_tensor('target', target, cls_name)
-    if input.ndim != 2 or target.ndim != 2:
-        raise ValueError(
-            "For 'MultiLabelSoftMarginLoss', the inputs must be 2d tensor, but got dims: "
-            f"input: {input.ndim}, target: {target.ndim} "
-        )
-    mul_op = _get_cache_prim(P.Mul)()
-    exp_op = _get_cache_prim(P.Exp)()
-    add_op = _get_cache_prim(P.Add)()
-    log_op = _get_cache_prim(P.Log)()
-    dyn_shape = _get_cache_prim(P.TensorShape)()
     input_shape = input.shape
     if ops.is_sequence_value_unknown(input_shape):
-        input_shape = dyn_shape(input)
+        input_shape = tensor_shape_(input)
-    pos = log_op(add_op(exp_op(-input), 1))
-    neg = log_op(add_op(exp_op(input), 1))
-    loss = mul_op(target, pos) + mul_op(1 - target, neg)
+    pos = log_(add_(exp_(-input), 1))
+    neg = log_(add_(exp_(input), 1))
+    loss = mul_(target, pos) + mul_(1 - target, neg)
     if weight is not None:
-        loss = mul_op(loss, weight)
+        loss = mul_(loss, weight)
     class_dim = input.ndim - 1
     loss = loss.sum(axis=class_dim) / input_shape[class_dim]
     return _get_loss(loss, reduction, cls_name)
-def elu(input_x, alpha=1.0):
-    r"""
-    Exponential Linear Unit activation function.
-    Applies the exponential linear unit function element-wise.
-    The activation function is defined as:
-    .. math::
-        \text{ELU}(x)= \left\{
-        \begin{array}{align}
-            \alpha(e^{x}  - 1) & \text{if } x \le 0\\
-            x & \text{if } x \gt 0\\
-        \end{array}\right.
-    Where :math:`x` is the element of input Tensor `input_x`, :math:`\alpha` is param `alpha`,
-    it determines the smoothness of ELU.
-    The picture about ELU looks like this `ELU <https://en.wikipedia.org/wiki/
-    Activation_function#/media/File:Activation_elu.svg>`_ .
-    Args:
-        input_x (Tensor): The input of ELU is a Tensor of any dimension with data type of float16 or float32.
-        alpha (float, optional): The alpha value of ELU, the data type is float. Only support '1.0' currently.
-            Default: ``1.0`` .
-    Returns:
-        Tensor, has the same shape and data type as `input_x`.
-    Raises:
-        TypeError: If `alpha` is not a float.
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
-        ValueError: If `alpha` is not equal to 1.0.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> output = ops.elu(x)
-        >>> print(output)
-        [[-0.63212055  4.         -0.99966455]
-         [ 2.         -0.99326205  9.        ]]
-    """
-    return _get_cache_prim(P.Elu)(alpha=alpha)(input_x)
-def gelu(input_x, approximate='none'):
+def gelu(input, approximate='none'):
     r"""
     Gaussian Error Linear Units activation function.
@@ -6602,18 +7045,23 @@ def gelu(input_x, approximate='none'):
     .. math::
         GELU(x_i) = 0.5 * x_i * (1 + \tanh(\sqrt(2 / \pi) * (x_i + 0.044715 * x_i^3)))
+    GELU Activation Function Graph:
+    .. image:: ../images/GELU.png
+        :align: center
     Args:
-        input_x (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
+        input (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
         approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are ``'none'`` and ``'tanh'`` .
             Default: ``'none'`` .
     Returns:
-        Tensor, with the same type and shape as `input_x`.
+        Tensor, with the same type and shape as `input`.
     Raises:
-        TypeError: If `input_x` is not a Tensor.
-        TypeError: If dtype of `input_x` is not float16, float32 or float64.
-        ValueError: If `approximate` value is neither `none` or `tanh`.
+        TypeError: If `input` is not a Tensor.
+        TypeError: If dtype of `input` is not bfloat16, float16, float32 or float64.
+        ValueError: If `approximate` value is neither `none` nor `tanh`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -6624,22 +7072,22 @@ def gelu(input_x, approximate='none'):
         >>> x = Tensor([1.0, 2.0, 3.0], mindspore.float32)
         >>> result = ops.gelu(x)
         >>> print(result)
-        [0.841192 1.9545976 2.9963627]
+        [0.8413447 1.9544997 2.9959505]
     """
     if approximate not in ['none', 'tanh']:
         raise ValueError("For ops.gelu, approximate value should be either 'none' or 'tanh'.")
-    x_dtype = _get_cache_prim(P.DType)()(input_x)
-    if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
+    x_dtype = dtype_(input)
+    if x_dtype not in [mstype.float16, mstype.float32, mstype.float64, mstype.bfloat16]:
         raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
                         f"but got {x_dtype}.")
     if approximate == 'tanh':
-        output = _get_cache_prim(P.GeLU)()(input_x)
+        output = gelu_(input)
     else:
-        output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
-        output = _get_cache_prim(P.Div)()(input_x, output)
-        output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
-        output = input_x * output * Tensor(0.5, x_dtype)
+        output = sqrt_(Tensor(2.0, x_dtype))
+        output = div_(input, output)
+        output = erf_(output) + Tensor(1.0, x_dtype)
+        output = input * output * Tensor(0.5, x_dtype)
     return output
@@ -6689,13 +7137,6 @@ def channel_shuffle(x, groups):
     return y
-@_primexpr
-def _shape_check(in_shape, dim_list, prim_name=None):
-    msg_prefix = f"For '{prim_name}', the" if prim_name else "The"
-    if len(in_shape) not in dim_list:
-        raise ValueError(f"{msg_prefix} input must has dim in {dim_list}, but got {len(in_shape)}")
 def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
     r"""
     Applying 1D LPPooling operation on an input Tensor can be regarded as forming a 1D input plane.
@@ -6731,7 +7172,7 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
               L_{out} = \left\lfloor\frac{L_{in} - \text{kernel_size}}{\text{stride}} + 1\right\rfloor
     Raises:
-        TypeError: If `x` is not an Tensor.
+        TypeError: If `x` is not a Tensor.
         TypeError: If `kernel_size` or `stride` is not an int.
         TypeError: If `ceil_mode` is not a bool.
         TypeError: If `norm_type` is neither float nor int.
@@ -6744,7 +7185,7 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> from mindspore import Tensor
         >>> import numpy as np
         >>> x = Tensor(np.arange(2 * 3 * 4).reshape((2, 3, 4)), dtype=ms.float32)
@@ -6757,7 +7198,6 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
           [51. 54.]
           [63. 66.]]]
     """
-    _shape_check(x.shape, [2, 3], "lp_pool1d")
     if isinstance(norm_type, (float, int)):
         norm_type = float(norm_type)
     else:
@@ -6816,7 +7256,7 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
               W_{out} = \left\lfloor\frac{W_{in} - \text{kernel_size}[1]}{\text{stride}[1]} + 1\right\rfloor
     Raises:
-        TypeError: If `x` is not an Tensor.
+        TypeError: If `x` is not a Tensor.
         TypeError: If `kernel_size` or `stride` is neither int nor tuple.
         TypeError: If `ceil_mode` is not a bool.
         TypeError: If `norm_type` is neither float nor int.
@@ -6830,7 +7270,7 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> from mindspore import Tensor
         >>> import numpy as np
         >>> x = Tensor(np.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)), dtype=ms.float32)
@@ -6850,7 +7290,6 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
            [ 999. 1008. 1017.]]]]
     """
-    _shape_check(x.shape, [4], "lp_pool2d")
     if isinstance(norm_type, (float, int)):
         norm_type = float(norm_type)
     else:
@@ -6913,13 +7352,13 @@ def mse_loss(input, target, reduction='mean'):
     if reduction not in ['mean', 'none', 'sum']:
         raise ValueError("For ops.mse_loss, `reduction` value should be either 'mean', 'none' or 'sum'.")
-    x = _get_cache_prim(P.Square)()(input - target)
+    x = square_(input - target)
     float_type = (mstype.float16, mstype.float32, mstype.float64)
     if x.dtype not in float_type:
         input_dtype = mstype.float32
     else:
         input_dtype = x.dtype
-    x = _get_cache_prim(P.Cast)()(x, mstype.float32)
+    x = cast_(x, mstype.float32)
     average_flag = True
     reduce_flag = True
@@ -6929,12 +7368,12 @@ def mse_loss(input, target, reduction='mean'):
         reduce_flag = False
     if reduce_flag and average_flag:
-        x = _get_cache_prim(P.ReduceMean)()(x, _get_axis(x))
+        x = reduce_mean_(x, _get_axis(x))
     if reduce_flag and not average_flag:
-        x = _get_cache_prim(P.ReduceSum)()(x, _get_axis(x))
+        x = reduce_sum_(x, _get_axis(x))
-    return _get_cache_prim(P.Cast)()(x, input_dtype)
+    return cast_(x, input_dtype)
 def msort(input):
@@ -6957,7 +7396,7 @@ def msort(input):
     Examples:
         >>> import mindspore as ms
-        >>> import mindspore.ops as ops
+        >>> from mindspore import ops
         >>> import numpy as np
         >>> input = ms.Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), ms.float16)
         >>> output = ops.msort(input)
@@ -6996,7 +7435,7 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
         Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
     Raises:
-        TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
+        TypeError: If `anchor` or `positive` or `negative` is not a Tensor.
         TypeError: If dtype of `anchor`, `positive` and `negative` is not the same.
         TypeError: If `margin` is not a float.
         TypeError: If `p` is not an int.
@@ -7335,7 +7774,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
     if attn_mask is not None and attn_mask.dtype == mstype.bool_:
         new_attn_mask = ops.zeros_like(attn_mask, dtype=q.dtype)
-        attn_mask = new_attn_mask.masked_fill(attn_mask, float("-inf"))
+        attn_mask = new_attn_mask.masked_fill(attn_mask, ops.cast(float("-inf"), new_attn_mask.dtype))
     if attn_mask is not None:
         if attn_mask.shape[0] == 1:
@@ -7444,11 +7883,10 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
     return out
-def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
-                           actual_seq_lengths_kv, deq_scale1, quant_scale1,
-                           deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
-                           next_tokens=0, input_layout='BSH',
-                           num_key_value_heads=0, sparse_mode=0):
+def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift,
+                           deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads,
+                           scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
+                           num_key_value_heads=0, sparse_mode=0, inner_precise=1):
     r"""
     The interface for fully inference.
     B -- Batch size
@@ -7456,7 +7894,7 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
     H -- Hidden size
     Note:
-    is only supported on ascend910B
+    experiment ops
     .. warning::
         This is an experimental API that is subject to change or deletion.
@@ -7468,11 +7906,11 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
           Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
         value (Tensor) - The value tensor with data type of float16 or float32.
           Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-        padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
         attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
           For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
-        actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
-        actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
+        actual_seq_lengths (Tensor): Describe actual sequence length of each input with data type of int64.
+        actual_seq_lengths_kv (Tensor): Describe actual sequence length of each input with data type of int64.
+        pse_shift (Tensor) - The position encoding tensor with data type of float16 or float32.
         dep_scale1 (Tensor)
         quant_scale1 (Tensor)
         deq_scale2 (Tensor)
@@ -7489,6 +7927,7 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
         num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
           The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
         sparse_mode (int): Default: 0
+        inner_precise (int): 0, float16 high precision. 1, high performance. default 1
     Outputs:
@@ -7510,16 +7949,128 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
         >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
         >>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
                                              None, N, input_layout='BNSD')
-        >>> print(out[0].shape)
+        >>> print(out.shape)
         (1, 16, 256, 16)
     """
     pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
-                                                       num_key_value_heads, sparse_mode)
-    return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
+                                                       num_key_value_heads, sparse_mode, inner_precise)
+    return pfa(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1,
                quant_scale1, deq_scale2, quant_scale2, quant_offset2)
+def incre_flash_attention(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1,
+                          dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table,
+                          num_heads, input_layout="BSH", scale_value=1.0, num_key_value_heads=0, block_size=0,
+                          inner_precise=1):
+    r"""
+    The interface for fully inference.
+    B -- Batch size
+    S -- Sequence length
+    H -- Hidden size
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+        If there is no input parameter and no default value, None needs to be passed.
+    Inputs:
+        - **query** (Tensor) - The query tensor with data type of float16 or bfloat16.
+          Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
+        - **key** (TensorList) - The key tensor with data type of float16 or bfloat16.
+          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
+        - **value** (TensorList) - The value tensor with data type of float16 or bfloat16.
+          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
+        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or bool.
+          Input tensor of shape :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`.
+        - **actual_seq_lengths** (Tensor) - Describe actual sequence length of each input with data type of int.
+        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
+        - **dequant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
+        - **quant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of float.
+        - **dequant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
+        - **quant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of float.
+        - **quant_offset2** (Tensor) - Quantitative parametor, the tensor with data type of float.
+        - **antiquant_scale** (Tensor) - Quantitative parametor, the tensor with data type of float.
+        - **antiquant_offset** (Tensor) - Quantitative parametor, the tensor with data type of float.
+        - **block_table** (Tensor) - The tensor with data type of float.
+        - **num_heads**  (int) - The number of heads.
+        - **input_layout** (str) - the data layout of the input qkv, support `(BSH)` and `(BNSD)`. Default `BSH`.
+        - **scale_value** (double) - The scale value indicating the scale coefficient, which is used as the scalar of
+          Muls in the calculation. Default: 1.0.
+        - **num_key_value_heads** (int) - head numbers of key/value which are used in GQA algorithm.
+          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
+        - **block_size** (int) - Default: 0.
+        - **inner_precise** (int) - Default: 1.
+    Outputs:
+        - **attention_out** (Tensor) - Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
+    Supported Platforms:
+        ``Ascend``
+    """
+    _ifa = _get_cache_prim(NN_OPS.IncreFlashAttention)(num_heads, input_layout, scale_value, num_key_value_heads,
+                                                       block_size, inner_precise)
+    return _ifa(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1,
+                dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table)
+def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False):
+    r"""
+    Retrieve the word embeddings in `weight` using indices specified in `input`.
+    .. warning::
+        On Ascend, the behavior is unpredictable when the value of input is invalid.
+    Args:
+        input (Tensor): The indices used to lookup in the `weight`. The data type must be mindspore.int32 or
+            mindspore.int64, and the value should be in range `[0, weight.shape[0])`.
+        weight (Parameter): The matrix where to lookup from. The shape must be 2D.
+        padding_idx (int, optional): If the value is not None, the corresponding row of `weight` will not be updated
+            in training. The value should be in range `[-weight.shape[0], weight.shape[0])` if it's not ``None``.
+            Default ``None``.
+        max_norm (float, optional): If not None, firstly get the p-norm result of the `weight` specified by `input`
+            where p is specified by `norm_type`; if the result is larger then `max_norm`, update the `weight`
+            with :math:`\frac{max\_norm}{result+1e^{-7}}` in-place. Default ``None``.
+        norm_type (float, optional): Indicates the value of p in p-norm. Default ``2.0``.
+        scale_grad_by_freq (bool, optional): If ``True`` the gradients will be scaled by the inverse of frequency of
+            the index in `input`. Default ``False``.
+    Returns:
+        Tensor, has the same data type as `weight`, the shape is :math:`(*input.shape, weight.shape[1])`.
+    Raises:
+        ValueError: If `padding_idx` is out of valid range.
+        ValueError: If the shape of `weight` is invalid.
+        TypeError: `weight` is not a :class:`mindspore.Parameter`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, Parameter, ops
+        >>> input = Tensor([[1, 0, 1, 1], [0, 0, 1, 0]])
+        >>> weight = Parameter(np.random.randn(3, 3).astype(np.float32))
+        >>> output = ops.embedding(input, weight, max_norm=0.4)
+        >>> print(output)
+        [[[ 5.49015924e-02,  3.47811311e-01, -1.89771220e-01],
+          [ 2.09307984e-01, -2.24846993e-02,  3.40124398e-01],
+          [ 5.49015924e-02,  3.47811311e-01, -1.89771220e-01],
+          [ 5.49015924e-02,  3.47811311e-01, -1.89771220e-01]],
+         [[ 2.09307984e-01, -2.24846993e-02,  3.40124398e-01],
+          [ 2.09307984e-01, -2.24846993e-02,  3.40124398e-01],
+          [ 5.49015924e-02,  3.47811311e-01, -1.89771220e-01],
+          [ 2.09307984e-01, -2.24846993e-02,  3.40124398e-01]]]
+    """
+    if not isinstance(weight, Parameter):
+        raise TypeError(f"For Embedding, the weight must be a mindspore.Parameter, but got {type(weight)}.")
+    return embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
 __all__ = [
     'adaptive_avg_pool1d',
     'adaptive_avg_pool2d',
@@ -7545,6 +8096,7 @@ __all__ = [
     'dropout1d',
     'dropout2d',
     'dropout3d',
+    'embedding',
     'fast_gelu',
     'fractional_max_pool2d',
     'fractional_max_pool3d',
@@ -7558,6 +8110,7 @@ __all__ = [
     'intopk',
     'interpolate',
     'upsample',
+    'layer_norm',
     'log_softmax',
     'mish',
     'lrn',
@@ -7590,6 +8143,7 @@ __all__ = [
     'conv3d_transpose',
     'conv1d',
     'conv2d',
+    'conv_transpose2d',
     'sigmoid',
     'logsigmoid',
     'relu',
@@ -7614,6 +8168,8 @@ __all__ = [
     'msort',
     'triplet_margin_loss',
     'channel_shuffle',
-    'hardsigmoid'
+    'hardsigmoid',
+    'group_norm',
+    'rms_norm',
 ]
 __all__.sort()