PyPI - mindspore - Versions diffs - 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl - Mend

mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (308) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +3 -1
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +50 -9
mindspore/_extends/parse/compile_config.py +41 -0
mindspore/_extends/parse/parser.py +9 -7
mindspore/_extends/parse/standard_method.py +52 -14
mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
mindspore/amp.py +24 -10
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +6 -4
mindspore/common/_pijit_context.py +190 -0
mindspore/common/_register_for_tensor.py +2 -1
mindspore/common/_tensor_overload.py +139 -0
mindspore/common/api.py +102 -87
mindspore/common/dump.py +5 -6
mindspore/common/generator.py +1 -7
mindspore/common/hook_handle.py +14 -26
mindspore/common/mindir_util.py +2 -2
mindspore/common/parameter.py +46 -13
mindspore/common/recompute.py +39 -9
mindspore/common/sparse_tensor.py +7 -3
mindspore/common/tensor.py +209 -29
mindspore/communication/__init__.py +1 -1
mindspore/communication/_comm_helper.py +38 -3
mindspore/communication/comm_func.py +310 -55
mindspore/communication/management.py +14 -14
mindspore/context.py +123 -22
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/__init__.py +1 -1
mindspore/dataset/core/config.py +7 -0
mindspore/dataset/core/validator_helpers.py +7 -0
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +72 -44
mindspore/dataset/engine/datasets_audio.py +7 -7
mindspore/dataset/engine/datasets_standard_format.py +53 -3
mindspore/dataset/engine/datasets_text.py +20 -20
mindspore/dataset/engine/datasets_user_defined.py +174 -104
mindspore/dataset/engine/datasets_vision.py +33 -33
mindspore/dataset/engine/iterators.py +29 -0
mindspore/dataset/engine/obs/util.py +7 -0
mindspore/dataset/engine/queue.py +114 -60
mindspore/dataset/engine/serializer_deserializer.py +2 -2
mindspore/dataset/engine/validators.py +34 -14
mindspore/dataset/text/__init__.py +1 -4
mindspore/dataset/transforms/__init__.py +0 -3
mindspore/dataset/utils/line_reader.py +2 -0
mindspore/dataset/vision/__init__.py +1 -4
mindspore/dataset/vision/utils.py +1 -1
mindspore/dataset/vision/validators.py +2 -1
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
mindspore/experimental/es/embedding_service.py +883 -0
mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
mindspore/experimental/llm_boost/__init__.py +21 -0
mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
mindspore/experimental/llm_boost/register.py +129 -0
mindspore/experimental/llm_boost/utils.py +31 -0
mindspore/experimental/optim/adamw.py +85 -0
mindspore/experimental/optim/optimizer.py +3 -0
mindspore/hal/__init__.py +3 -3
mindspore/hal/contiguous_tensors_handle.py +175 -0
mindspore/hal/stream.py +18 -0
mindspore/include/api/model_group.h +13 -1
mindspore/include/api/types.h +10 -10
mindspore/include/dataset/config.h +2 -2
mindspore/include/dataset/constants.h +2 -2
mindspore/include/dataset/execute.h +2 -2
mindspore/include/dataset/vision.h +4 -0
mindspore/jpeg62.dll +0 -0
mindspore/log.py +1 -1
mindspore/mindrecord/filewriter.py +68 -51
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mint/__init__.py +495 -46
mindspore/mint/distributed/__init__.py +31 -0
mindspore/mint/distributed/distributed.py +254 -0
mindspore/mint/nn/__init__.py +266 -21
mindspore/mint/nn/functional.py +125 -19
mindspore/mint/nn/layer/__init__.py +39 -0
mindspore/mint/nn/layer/activation.py +133 -0
mindspore/mint/nn/layer/normalization.py +477 -0
mindspore/mint/nn/layer/pooling.py +110 -0
mindspore/mint/optim/adamw.py +28 -7
mindspore/mint/special/__init__.py +63 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/multiprocessing/__init__.py +2 -1
mindspore/nn/__init__.py +0 -1
mindspore/nn/cell.py +275 -93
mindspore/nn/layer/activation.py +211 -44
mindspore/nn/layer/basic.py +113 -3
mindspore/nn/layer/embedding.py +120 -2
mindspore/nn/layer/normalization.py +101 -5
mindspore/nn/layer/padding.py +34 -48
mindspore/nn/layer/pooling.py +161 -7
mindspore/nn/layer/transformer.py +3 -3
mindspore/nn/loss/__init__.py +2 -2
mindspore/nn/loss/loss.py +84 -6
mindspore/nn/optim/__init__.py +2 -1
mindspore/nn/optim/adadelta.py +1 -1
mindspore/nn/optim/adam.py +1 -1
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/tft_wrapper.py +127 -0
mindspore/nn/wrap/cell_wrapper.py +12 -23
mindspore/nn/wrap/grad_reducer.py +5 -5
mindspore/nn/wrap/loss_scale.py +17 -3
mindspore/numpy/__init__.py +1 -1
mindspore/numpy/array_creations.py +65 -68
mindspore/numpy/array_ops.py +64 -60
mindspore/numpy/fft.py +610 -75
mindspore/numpy/logic_ops.py +11 -10
mindspore/numpy/math_ops.py +85 -84
mindspore/numpy/utils_const.py +4 -4
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +6 -4
mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
mindspore/ops/_vmap/vmap_array_ops.py +2 -4
mindspore/ops/_vmap/vmap_math_ops.py +17 -1
mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
mindspore/ops/auto_generate/gen_extend_func.py +734 -13
mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
mindspore/ops/composite/base.py +85 -48
mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
mindspore/ops/function/__init__.py +22 -0
mindspore/ops/function/array_func.py +490 -153
mindspore/ops/function/debug_func.py +113 -1
mindspore/ops/function/fft_func.py +15 -2
mindspore/ops/function/grad/grad_func.py +3 -2
mindspore/ops/function/math_func.py +558 -207
mindspore/ops/function/nn_func.py +817 -383
mindspore/ops/function/other_func.py +3 -2
mindspore/ops/function/random_func.py +184 -8
mindspore/ops/function/reshard_func.py +13 -11
mindspore/ops/function/sparse_unary_func.py +1 -1
mindspore/ops/function/vmap_func.py +3 -2
mindspore/ops/functional.py +24 -14
mindspore/ops/op_info_register.py +3 -3
mindspore/ops/operations/__init__.py +6 -1
mindspore/ops/operations/_grad_ops.py +2 -76
mindspore/ops/operations/_infer_ops.py +1 -1
mindspore/ops/operations/_inner_ops.py +71 -94
mindspore/ops/operations/array_ops.py +12 -146
mindspore/ops/operations/comm_ops.py +42 -53
mindspore/ops/operations/custom_ops.py +83 -19
mindspore/ops/operations/debug_ops.py +42 -10
mindspore/ops/operations/manually_defined/_inner.py +12 -0
mindspore/ops/operations/manually_defined/ops_def.py +265 -10
mindspore/ops/operations/math_ops.py +12 -223
mindspore/ops/operations/nn_ops.py +20 -114
mindspore/ops/operations/other_ops.py +7 -4
mindspore/ops/operations/random_ops.py +46 -1
mindspore/ops/primitive.py +18 -6
mindspore/ops_generate/arg_dtype_cast.py +2 -0
mindspore/ops_generate/gen_aclnn_implement.py +11 -11
mindspore/ops_generate/gen_constants.py +36 -0
mindspore/ops_generate/gen_ops.py +67 -52
mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
mindspore/ops_generate/gen_pyboost_func.py +131 -47
mindspore/ops_generate/op_proto.py +10 -3
mindspore/ops_generate/pyboost_utils.py +14 -1
mindspore/ops_generate/template.py +43 -21
mindspore/parallel/__init__.py +3 -1
mindspore/parallel/_auto_parallel_context.py +28 -8
mindspore/parallel/_cell_wrapper.py +83 -0
mindspore/parallel/_parallel_serialization.py +47 -19
mindspore/parallel/_tensor.py +81 -11
mindspore/parallel/_utils.py +13 -1
mindspore/parallel/algo_parameter_config.py +5 -5
mindspore/parallel/checkpoint_transform.py +46 -39
mindspore/parallel/cluster/process_entity/__init__.py +1 -1
mindspore/parallel/cluster/process_entity/_api.py +31 -23
mindspore/parallel/cluster/process_entity/_utils.py +2 -27
mindspore/parallel/parameter_broadcast.py +3 -4
mindspore/parallel/shard.py +162 -31
mindspore/parallel/transform_safetensors.py +993 -0
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/common/constant.py +29 -0
mindspore/profiler/common/registry.py +47 -0
mindspore/profiler/common/util.py +28 -0
mindspore/profiler/dynamic_profiler.py +694 -0
mindspore/profiler/envprofiling.py +17 -19
mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
mindspore/profiler/parser/base_timeline_generator.py +19 -25
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
mindspore/profiler/parser/framework_parser.py +1 -391
mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
mindspore/profiler/parser/memory_usage_parser.py +0 -154
mindspore/profiler/parser/profiler_info.py +78 -6
mindspore/profiler/profiler.py +153 -0
mindspore/profiler/profiling.py +280 -412
mindspore/rewrite/__init__.py +1 -2
mindspore/rewrite/common/namespace.py +4 -4
mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
mindspore/run_check/_check_version.py +36 -103
mindspore/safeguard/rewrite_obfuscation.py +591 -247
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +4 -3
mindspore/train/_utils.py +28 -2
mindspore/train/amp.py +171 -53
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +4 -4
mindspore/train/callback/_checkpoint.py +85 -22
mindspore/train/callback/_cluster_monitor.py +1 -1
mindspore/train/callback/_flops_collector.py +1 -0
mindspore/train/callback/_loss_monitor.py +3 -3
mindspore/train/callback/_on_request_exit.py +134 -31
mindspore/train/callback/_summary_collector.py +5 -5
mindspore/train/callback/_tft_register.py +352 -0
mindspore/train/dataset_helper.py +7 -3
mindspore/train/metrics/metric.py +3 -3
mindspore/train/metrics/roc.py +4 -4
mindspore/train/mind_ir_pb2.py +44 -39
mindspore/train/model.py +134 -58
mindspore/train/serialization.py +336 -112
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +21 -0
mindspore/utils/utils.py +60 -0
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +281 -275
mindspore/include/c_api/ms/abstract.h +0 -67
mindspore/include/c_api/ms/attribute.h +0 -197
mindspore/include/c_api/ms/base/handle_types.h +0 -43
mindspore/include/c_api/ms/base/macros.h +0 -32
mindspore/include/c_api/ms/base/status.h +0 -33
mindspore/include/c_api/ms/base/types.h +0 -283
mindspore/include/c_api/ms/context.h +0 -102
mindspore/include/c_api/ms/graph.h +0 -160
mindspore/include/c_api/ms/node.h +0 -606
mindspore/include/c_api/ms/tensor.h +0 -161
mindspore/include/c_api/ms/value.h +0 -84
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/extend/basic.py +0 -140
mindspore/nn/extend/embedding.py +0 -143
mindspore/nn/extend/layer/normalization.py +0 -109
mindspore/nn/extend/pooling.py +0 -117
mindspore/nn/layer/embedding_service.py +0 -531
mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
mindspore/ops/extend/__init__.py +0 -53
mindspore/ops/extend/array_func.py +0 -218
mindspore/ops/extend/math_func.py +0 -76
mindspore/ops/extend/nn_func.py +0 -308
mindspore/ops/silent_check.py +0 -162
mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
mindspore/profiler/parser/msadvisor_parser.py +0 -240
mindspore/train/callback/_mindio_ttp.py +0 -443
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0

mindspore/ops/function/nn_func.py CHANGED Viewed

@@ -29,7 +29,6 @@ import mindspore.common.dtype as mstype
 from mindspore.ops.function.math_func import logsumexp
 from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
 from mindspore.common.tensor import Tensor
-from mindspore.common.parameter import Parameter
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore.ops._primitive_cache import _get_cache_prim
 from mindspore import _checkparam as validator
@@ -41,13 +40,22 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
 from mindspore.ops.operations.nn_ops import TripletMarginLoss
 from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
 from mindspore.common.api import _function_forbid_reuse
-from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6
-from mindspore.ops.auto_generate import group_norm_op, rms_norm, layer_norm_ext_op, batch_norm_ext_op
-from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, reflection_pad_3d_op, # pylint: disable=W0611
+from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6, \
+    softmax_impl, swiglu, logsigmoid_op
+from mindspore.ops.auto_generate import group_norm_op, rms_norm, layer_norm_ext_op, batch_norm_ext_op, mse_loss_ext
+from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, add_layernorm_v2_op,
+                                         reflection_pad_3d_op,  # pylint: disable=W0611
                                          replication_pad_1d_op, replication_pad_2d_op, replication_pad_3d_op,
-                                         constant_pad_nd_op, dropout_ext_op, reverse_v2_impl)
-from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution
+                                         constant_pad_nd_op, dropout_ext_op, reverse_v2_impl, avg_pool2d_op,
+                                         upsample_nearest1d_op, upsample_nearest2d_op, upsample_nearest3d_op,
+                                         upsample_linear1d_op, upsample_bilinear2d_op, upsample_bicubic2d_op,
+                                         upsample_trilinear3d_impl, fill_scalar_op, floor_op)
+from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution, ConstantPadND, MaxPoolWithIndices, \
+    MaxPoolWithMask
 from mindspore.common.generator import default_generator
+from mindspore.ops.auto_generate import hardshrink, hardsigmoid, hardswish
+from mindspore.ops.auto_generate import softshrink
+from mindspore.ops.auto_generate import adaptive_avg_pool2d_ext_op
 abs_ = P.Abs()
 add_ = P.Add()
@@ -64,7 +72,6 @@ gather_ = P.Gather()
 gather_d_ = P.GatherD()
 gelu_ = P.GeLU()
 greater_ = P.Greater()
-hardswish_ = P.HSwish()
 less_ = P.Less()
 list_to_tensor_ = ListToTensor()
 log_ = P.Log()
@@ -106,6 +113,7 @@ check_string_const = constexpr(validator.check_string)
 generator_step_ = Tensor(1, mstype.int64)
 def adaptive_avg_pool2d(input, output_size):
     r"""
     Performs 2D adaptive average pooling on a multi-plane input signal.
@@ -200,6 +208,92 @@ def adaptive_avg_pool2d(input, output_size):
     return adaptive_avgpool2d_(input)
+def adaptive_avg_pool2d_ext(input, output_size):
+    r"""
+    Performs 2D adaptive average pooling on a multi-plane input signal.
+    That is, for any input size, the size of the specified output is H x W.
+    The number of output features is equal to the number of input features.
+    The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels,
+    H is the feature height, and W is the feature width.
+    For adaptive average pooling for 2D:
+    ..  math::
+        \begin{align}
+        h_{start} &= floor(i * H_{in} / H_{out})\\
+        h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\
+        w_{start} &= floor(j * W_{in} / W_{out})\\
+        w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\
+        Output(i,j) &= \frac{\sum Input[h_{start}:h_{end}, w_{start}:w_{end}]}{(h_{end}- h_{start})
+        * (w_{end}- w_{start})}
+        \end{align}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): The input of adaptive_avg_pool2d, which is a 3D or 4D tensor,
+            with float16 or float32 data type.
+        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
+            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
+            If it is None, it means the output size is the same as the input size.
+    Returns:
+        Tensor, with the same type as the `input`.
+        Shape of the output is `input_shape[:len(input_shape) - len(out_shape)] + out_shape`.
+    .. math::
+        out\_shape = \begin{cases}
+        input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
+        output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
+        input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
+        (h, h), & \text{if } output\_size text{ is h;}\\
+        (h, w), & \text{if } output\_size text{ is (h, w)}
+        \end{cases}
+    Raises:
+        ValueError: If `output_size` is a tuple and the length of `output_size` is not 2.
+        TypeError: If `input` is not a Tensor.
+        TypeError: If dtype of `input` is not float16, float32 or float64.
+        ValueError: If the dimension of `input` is less than or equal to the dimension of `output_size`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, mint
+        >>> # case 1: output_size=(3, 2)
+        >>> input = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]), mindspore.float32)
+        >>> output = mint.nn.functional.adaptive_avg_pool2d(input, (3, 2))
+        >>> print(output)
+        [[[1.5 2.5]
+         [4.5 5.5]
+         [7.5 8.5]]
+        [[1.5 2.5]
+         [4.5 5.5]
+         [7.5 8.5]]
+        [[1.5 2.5]
+         [4.5 5.5]
+         [7.5 8.5]]]
+    """
+    output_size_ = None
+    if isinstance(output_size, int):
+        output_size_ = output_size
+    else:
+        origin_shape = shape_(input)
+        w_ = origin_shape[-2] if output_size[-2] is None else output_size[-2]
+        h_ = origin_shape[-1] if output_size[-1] is None else output_size[-1]
+        output_size_ = (w_, h_)
+    return adaptive_avg_pool2d_ext_op(input, output_size_)
 def adaptive_avg_pool3d(input, output_size):
     r"""
     Performs 3D adaptive average pooling on a multi-plane input signal.
@@ -446,6 +540,9 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     .. warning::
         `kernel_size` is in the range `[1, 255]`. `stride` is in the range `[1, 63]`.
+    Note:
+        This interface currently does not support Atlas A2 training series products.
     Args:
         input_x (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. It is an int number
@@ -518,37 +615,41 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
 def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True,
                    divisor_override=None):
     r"""
-        Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
-        Typically the input is of shape :math:`(N, C, H_{in}, W_{in})`, outputs regional average in the
-        :math:`(H_{in}, W_{in})`-dimension. Given kernel size :math:`(k_{H}, k_{W})` and `stride` , the operation
-        is as follows.
+        Applies a 2D average pooling over an input Tensor which can be regarded as a composition of
+        2D input planes. Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` ,
+        outputs regional average in the :math:`(H_{in}, W_{in})` -dimension.
+        Given kernel size :math:`(kH, kW)` and `stride` , the operation is as follows.
+        .. note::
+            On the Atlas platform, when calculating the input, the precision is degraded from float32 to float16.
         .. math::
-            \text{output}(N_i, C_j, h, w) = \frac{1}{k_{H} * k_{W}} \sum_{m=0}^{k_{H}-1} \sum_{n=0}^{k_{W}-1}
+            \text{output}(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
             \text{input}(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
         Args:
-            input (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})`.
+            input (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
             kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
-                Can be a single number or a tuple (kH, kW).
-            stride (Union[int, tuple[int], list[int]]): The distance of kernel moving. Can be a single number or
-                a tuple (sH, sW). Default value is `kernel_size` .
-            padding (Union(int, tuple[int], list[int])): Implicit zero padding to be added on both sides.
-                Can be a single number or a tuple (padH, padW). Default: 0.
-            ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape.
+                Can be a single number or a tuple :math:`(kH, kW)` .
+            stride (Union[int, tuple[int], list[int]], optional): The distance of kernel moving.
+                Can be a single number or a tuple :math:`(sH, sW)` . Default: ``None``,
+                where its value is equal to `kernel_size`.
+            padding (Union[int, tuple[int], list[int]], optional): Implicit zero padding to be added on both sides.
+                Can be a single number or a tuple :math:`(padH, padW)` . Default: ``0``.
+            ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
                 Default: ``False``.
-            count_include_pad (bool): If True, include the zero-padding in the averaging calculation.
+            count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
                 Default: ``True`` .
-            divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
+            divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
                 otherwise size of pooling region will be used. Default: ``None``.
         Returns:
-            Tensor, with shape :math:`(N, C, H_{out}, W_{out})`.
+            Tensor, with shape :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`.
             .. math::
                 \begin{array}{ll} \\
-                    H_{out} = \frac{H_{in} + 2 \times padding[0] - kernel_size[0]}{stride[0]} + 1 \\
-                    W_{out} = \frac{W_{in} + 2 \times padding[1] - kernel_size[1]}{stride[1]} + 1
+                    H_{out} = \frac{H_{in} + 2 \times padding[0] - kernel\_size[0]}{stride[0]} + 1 \\
+                    W_{out} = \frac{W_{in} + 2 \times padding[1] - kernel\_size[1]}{stride[1]} + 1
                 \end{array}
         Raises:
@@ -556,11 +657,10 @@ def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
             TypeError: If `kernel_size` or `stride` is neither int nor tuple.
             TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
             TypeError: If `divisor_override` is not an int or None.
-            ValueError: If the dimension of `input` is not equal to `4` or `3`.
+            ValueError: If the dimension of `input` is not equal to `3` or `4`.
             ValueError: If `kernel_size` or `stride` is less than 1.
-            ValueError: If `kernel_size` or `stride` is a tuple whose length is not equal to `2` or `1`.
-            ValueError: If `padding` is neither a int nor a tuple whose length is equal to `2` or `1`.
             ValueError: If value of `padding` is less than `0`.
+            ValueError: If `kernel_size`, `padding` or `stride` is a tuple whose length is not equal to `1` or `2`.
         Supported Platforms:
             ``Ascend``
@@ -573,16 +673,15 @@ def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
             >>> output = ops.function.nn_func.avg_pool2d_ext(x, kernel_size=2, stride=1)
             >>> print(output)
             [[[[ 2.5   3.5   4.5]
-            [ 6.5   7.5   8.5]]
-            [[14.5  15.5  16.5]
-            [18.5  19.5  20.5]]
-            [[26.5  27.5  28.5]
-            [30.5  31.5  32.5]]]]
+               [ 6.5   7.5   8.5]]
+              [[14.5  15.5  16.5]
+               [18.5  19.5  20.5]]
+              [[26.5  27.5  28.5]
+               [30.5  31.5  32.5]]]]
     """
     if stride is None:
         stride = kernel_size
-    return _get_cache_prim(ops.auto_generate.AvgPool2D)()(input, kernel_size, stride, padding,
-                                                          ceil_mode, count_include_pad, divisor_override)
+    return avg_pool2d_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
 def _check_avg_pool3d_padding(padding):
@@ -615,6 +714,9 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     .. warning::
         `kernel_size` is in the range `[1, 255]`. `stride` is in the range `[1, 63]`.
+    Note:
+        This interface currently does not support Atlas A2 training series products.
     Args:
         input_x (Tensor): Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`. Currently support float16 and
             float32 data type.
@@ -875,7 +977,7 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
             width respectively. The value must be a positive integer. If it is None, the output size and
             input size of the corresponding dimension are the same.
         return_indices (bool, optional): If `return_indices` is `True`, the indices of max value would be output,
-            Otherwise, it will not be output. Default: `False`.
+            Otherwise, it will not be output. Default: ``False``.
     Returns:
         - **y** (Tensor) - Tensor, with the same number of dims and data type as the `input`.
@@ -1227,7 +1329,8 @@ def binary_cross_entropy_with_logits(input, target, weight=None, pos_weight=None
     Adds sigmoid activation function to input `input` as logits, and uses the given logits to compute binary cross
     entropy between the `input` and the `target`.
-    Sets input `input` as :math:`X`, input target as :math:`Y`, input weight as :math:`W`, output as :math:`L`. Then,
+    Sets input `input` as :math:`X`, input `target` as :math:`Y`, input `weight` as :math:`W`, output as :math:`L`.
+    Then,
     .. math::
@@ -1268,15 +1371,19 @@ def binary_cross_entropy_with_logits(input, target, weight=None, pos_weight=None
     :math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
     Args:
-        input (Tensor): Input `input`. Data type must be float16 or float32.
+        input (Tensor): Input `input`. Data type must be float16, float32 or bfloat16(only Atlas A2 series products
+          are supported).
         target (Tensor): Ground truth label, has the same shape as `input`.
-          Data type must be float16 or float32.
+          Data type must be float16, float32 or bfloat16(only Atlas A2 series products
+          are supported).
         weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
-          broadcast to a tensor with shape of `input`. Data type must be float16 or float32.
-          Default: ``None``, `weight` is a Tensor whose value is ``1``.
+          broadcast to a tensor with shape of `input`. Data type must be float16, float32 or bfloat16(only Atlas A2
+          series products are supported).
+          Default: ``None``, it equals to `weight` is a Tensor whose value is ``1``.
         pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
           number of classes. It can be broadcast to a tensor with shape of `input`.
-          Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
+          Data type must be float16, float32 or bfloat16(only Atlas A2 series products are supported).
+          Default: ``None``, it equals to `pos_weight` is a Tensor whose value is ``1``.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -1290,7 +1397,7 @@ def binary_cross_entropy_with_logits(input, target, weight=None, pos_weight=None
     Raises:
         TypeError: If input `input`, `target`, `weight`, `pos_weight` is not Tensor.
-        TypeError: If data type of input `input`, `target`, `weight`, `pos_weight` is neither float16 nor float32.
+        TypeError: If data type of input `input`, `target`, `weight`, `pos_weight` is not float16, float32 or bfloat16.
         TypeError: If data type of input `reduction` is not string.
         ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `input`.
         ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
@@ -1323,6 +1430,10 @@ def dropout(input, p=0.5, training=True, seed=None):
     avoid overfitting. And the return will be multiplied by :math:`\frac{1}{1-p}` during training.
     During the reasoning, this operation returns the same Tensor as the `x`.
+    .. warning::
+        The Ascend backend does not support the reproducibility of random numbers, so
+        the `seed` parameter has no effect.
     Args:
         input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
         p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
@@ -1911,55 +2022,6 @@ def kl_div(logits, labels, reduction='mean'):
     return _get_cache_prim(P.KLDivLoss)(reduction=reduction)(logits, labels)
-def hardshrink(x, lambd=0.5):
-    r"""
-    Hard Shrink activation function. Calculates the output according to the input elements.
-    The formula is defined as follows:
-    .. math::
-        \text{HardShrink}(x) =
-        \begin{cases}
-        x, & \text{ if } x > \lambda \\
-        x, & \text{ if } x < -\lambda \\
-        0, & \text{ otherwise }
-        \end{cases}
-    HShrink Activation Function Graph:
-    .. image:: ../images/HShrink.png
-        :align: center
-    Args:
-        x (Tensor): The input of Hard Shrink with data type of float16 or float32.
-        lambd (float, optional): The threshold :math:`\lambda` defined by the Hard Shrink formula.
-            Default: ``0.5`` .
-    Returns:
-        Tensor, has the same data type and shape as the input `x`.
-    Raises:
-        TypeError: If `lambd` is not a float.
-        TypeError: If `x` is not a tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([[ 0.5,  1,  2.0], [0.0533,0.0776,-2.1233]]), mindspore.float32)
-        >>> output = ops.hardshrink(x)
-        >>> print(output)
-        [[ 0.      1.      2.    ]
-        [ 0.      0.     -2.1233]]
-    """
-    hshrink_op = _get_cache_prim(P.HShrink)(lambd)
-    return hshrink_op(x)
 @constexpr
 def _check_axis_in_range(axis, ndim):
     """Checks axes are with the bounds of ndim"""
@@ -2148,48 +2210,6 @@ def is_floating_point(input):
     return input.dtype in [mstype.float32, mstype.bfloat16, mstype.float16, mstype.float64]
-def hardswish(x):
-    r"""
-    Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
-    Hard swish is defined as:
-    .. math::
-        \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6}
-    where :math:`x_i` is an element of the input Tensor.
-    HSwish Activation Function Graph:
-    .. image:: ../images/HSwish.png
-        :align: center
-    Args:
-        x (Tensor): The input to compute the Hard Swish.
-    Returns:
-        Tensor, has the same data type and shape as the input.
-    Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is not int or float.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
-        >>> output = ops.hardswish(x)
-        >>> print(output)
-        [-0.3333  -0.3333  0  1.666  0.6665]
-    """
-    return hardswish_(x)
 def _is_dim_unknown(shape):
     return isinstance(shape, tuple) and -2 in shape
@@ -2616,14 +2636,7 @@ def _interploate_ext_make_tuple(input, value):
     if F.isconstant(value) and F.isconstant(rank):
         out = tuple([value for _ in range(rank)])
     else:
-        s = tuple_to_tensor_((rank,), mstype.int32)
-        v = None
-        if isinstance(value, int):
-            v = F.scalar_to_tensor(value, mstype.int64)
-        else:
-            v = F.scalar_to_tensor(value, mstype.float32)
-        t = fillv2_(s, v)
-        out = tensor_to_tuple_(t)
+        out = tensor_to_tuple_(fill_scalar_op((rank,), value, None))
     return out
@@ -2638,11 +2651,9 @@ def _interpolate_ext_scale_factor_convert_size(input, scale_factor):
         size = tuple([floor(shape[i + 2] * scale_factor[i])
                       for i in range(tuple_len)])
     else:
-        x = tuple_to_tensor_(shape[2:], mstype.int64)
-        y = tuple_to_tensor_(scale_factor, mstype.float32)
-        t = x * y
-        t = ops.TruncateDiv()(t, Tensor(1))
-        t = ops.cast(t, mstype.int64)
+        x = tuple_to_tensor_(shape[2:], mstype.float32)
+        y = tuple_to_tensor_(tuple(scale_factor), mstype.float32)
+        t = ops.cast(floor_op(x * y), mstype.int64)
         size = tensor_to_tuple_(t)
     return size
@@ -2656,9 +2667,15 @@ def interpolate_ext(input,
     r"""
     Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
+    .. warnings:
+        This is an experimental API that is subject to change or deletion.
     .. note::
-        - In 'linear' mode, backpropagation does not support scenarios where `scale_factor` is not None
-          and `align_corners` is False.
+        - In 'linear' mode, the scenarios, where `scale_factor` is not None and `align_corners` is False,
+          is not supported.
+        - In 'nearest' mode, there may exist precision problem in the scenarios, where input is 3-D/4-D Tensor
+          and the image is scaled by scale_factor.
+        - `mode` and `scale_factor` should be constants.
     Args:
         input (Tensor): Tensor to be resized.
@@ -2673,10 +2690,8 @@ def interpolate_ext(input,
             after removing the first two dimensions N, C.
             One and only one of size and scale_factor can be set to None. Default: ``None`` .
         mode (str): The sampling algorithm.
-            One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
-            'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
-            knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
+            One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), and 'bicubic' (4D only).
+            Default: ``"nearest"`` .
         align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
             applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
@@ -2698,39 +2713,24 @@ def interpolate_ext(input,
             and finally scaled using the value of `size`.
             If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
-    .. note::
-        The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
-        scikit-image and PIL. The 'nearest' mode produces the same results as the INTER_NEAREST interpolation
-        algorithm used in OpenCV.
     Args Support List and Supported Platforms:
     +---------------+-----------+---------------+--------------+----------------+
     | mode          | input.dim | align_corners | scale_factor | device         |
     +===============+===========+===============+==============+================+
-    | nearest       | 3         | \-            | √            | Ascend,GPU,CPU |
-    +---------------+-----------+---------------+--------------+----------------+
-    |               | 4         | \-            | √            | Ascend,GPU,CPU |
-    +---------------+-----------+---------------+--------------+----------------+
-    |               | 5         | \-            | √            | Ascend,GPU,CPU |
-    +---------------+-----------+---------------+--------------+----------------+
-    | linear        | 3         | √             | √            | Ascend,GPU,CPU |
-    +---------------+-----------+---------------+--------------+----------------+
-    | bilinear      | 4         | √             | ×            | Ascend,GPU,CPU |
+    | nearest       | 3         | \-            | √            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    | bicubic       | 4         | √             | ×            | Ascend,GPU,CPU |
-    +---------------+-----------+---------------+--------------+----------------+
-    | area          | 3         | \-            | √            | Ascend,GPU,CPU |
+    |               | 4         | \-            | √            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    |               | 4         | \-            | √            | Ascend,GPU,CPU |
+    |               | 5         | \-            | √            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    |               | 5         | \-            | √            | Ascend,GPU,CPU |
+    | linear        | 3         | √             | √            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    | nearest-exact | 3         | \-            | ×            | Ascend,CPU     |
+    | bilinear      | 4         | √             | ×            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    |               | 4         | \-            | ×            | Ascend,CPU     |
+    | bicubic       | 4         | √             | ×            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
-    | trilinear     | 5         | √             | √            | Ascend,GPU,CPU |
+    | trilinear     | 5         | √             | √            | Ascend         |
     +---------------+-----------+---------------+--------------+----------------+
     - `-` indicates that there is no such parameter.
@@ -2738,7 +2738,21 @@ def interpolate_ext(input,
     - `√` indicates that this parameter is supported.
     Returns:
-        Tensor, resized, whose dimensions and dtype are the same as `input`.
+        Tensor, sampled, whose dimensions and dtype are the same as `input`.
+    Shape:
+        - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
+          or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
+    .. math::
+        D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
+    .. math::
+        H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
+    .. math::
+        W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
     Raises:
         TypeError: `input` is not a Tensor.
@@ -2753,7 +2767,7 @@ def interpolate_ext(input,
         ValueError: `align_corners` is not in the corresponding list of supported values.
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend``
     Examples:
         >>> import mindspore
@@ -2764,66 +2778,32 @@ def interpolate_ext(input,
             [[[1. 1. 2. 2. 3. 3.]
               [4. 4. 5. 5. 6. 6.]]]
     """
     def run_nearest(x, size, align_corners=None, scale_factor=None):
         x_rank = F.rank(x)
         if x_rank == 3:
-            x = _get_cache_prim(ops.auto_generate.UpsampleNearest1D)()(
-                x, size, scale_factor)
+            out = upsample_nearest1d_op(x, size, scale_factor)
         elif x_rank == 4:
-            x = _get_cache_prim(ops.auto_generate.UpsampleNearest2D)()(
-                x, size, scale_factor)
+            out = upsample_nearest2d_op(x, size, scale_factor)
         else:
-            x = _get_cache_prim(P.UpsampleNearest3D)()(x, size, scale_factor)
-        return x
+            out = upsample_nearest3d_op(x, size, scale_factor)
+        return out
     def run_linear(x, size, align_corners=None, scale_factor=None):
-        out = _get_cache_prim(
-            ops.auto_generate.UpsampleLinear1D)()(x, size, scale_factor, align_corners)
+        out = upsample_linear1d_op(x, size, scale_factor, align_corners)
         return out
     def run_bilinear(x, size, align_corners=None, scale_factor=None):
-        out = _get_cache_prim(
-            ops.auto_generate.UpsampleBilinear2D)()(x, size, scale_factor, align_corners)
+        out = upsample_bilinear2d_op(x, size, scale_factor, align_corners)
         return out
     def run_trilinear(x, size, align_corners=None, scale_factor=None):
-        resize = _get_cache_prim(P.nn_ops.UpsampleTrilinear3D)(align_corners)
-        return resize(x, size, scale_factor)
+        out = upsample_trilinear3d_impl(x, size, scale_factor, align_corners)
+        return out
     def run_bicubic(x, size, align_corners=None, scale_factor=None):
-        resize = _get_cache_prim(P.image_ops.ResizeBicubic)(
-            align_corners=align_corners, half_pixel_centers=not align_corners)
-        x = resize(x, size)
-        return x
-    def run_area(x, size, align_corners=None, scale_factor=None):
-        x_rank = F.rank(x)
-        if x_rank == 3:
-            x = F.adaptive_avg_pool1d(x, size[0])
-        elif x_rank == 4:
-            x = F.adaptive_avg_pool2d(x, tuple(size))
-        else:
-            x = F.adaptive_avg_pool3d(x, tuple(size))
-        return x
-    def run_nearest_exact(x, size, align_corners=None, scale_factor=None):
-        x_rank = F.rank(x)
-        if x_rank == 3:
-            size = size[:1] + (1,)
-            # For impl of nearest 3D use 4D.
-            x = x.unsqueeze(-1)
-            resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
-                align_corners=False,
-                half_pixel_centers=True)
-            x = resize(x, size)
-            x = _get_cache_prim(P.Squeeze)(-1)(x)
-        if x_rank == 4:
-            resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
-                align_corners=False,
-                half_pixel_centers=True)
-            x = resize(x, size)
-        return x
+        out = upsample_bicubic2d_op(x, size, scale_factor, align_corners)
+        return out
     resize_funcs = {
         "nearest": run_nearest,
@@ -2831,8 +2811,6 @@ def interpolate_ext(input,
         "bilinear": run_bilinear,
         "bicubic": run_bicubic,
         "trilinear": run_trilinear,
-        "area": run_area,
-        "nearest-exact": run_nearest_exact,
     }
     # mode check
@@ -3034,8 +3012,7 @@ def softmax(input, axis=-1, *, dtype=None):
         raise TypeError(f" the type of 'axis' must be 'int', but got '{axis}' with type '{type_axis}'.")
     if dtype is not None:
         input = ops.cast(input, dtype)
-    softmax_ = _get_cache_prim(P.Softmax)(axis)
-    return softmax_(input)
+    return softmax_impl(input, axis)
 def softmax_ext(input, dim=None, dtype=None):
@@ -3082,8 +3059,50 @@ def softmax_ext(input, dim=None, dtype=None):
         raise TypeError(f" the type of 'dim' must be 'int', but got '{dim}' with type '{type_dim}'.")
     if dtype is not None:
         input = ops.cast(input, dtype)
-    softmax_ = _get_cache_prim(P.Softmax)(dim)
-    return softmax_(input)
+    return softmax_impl(input, dim)
+def log_softmax_ext(input, dim=None, *, dtype=None):
+    r"""
+    Applies the Log Softmax function to the input tensor on the specified axis.
+    Supposes a slice in the given axis, :math:`x` for each element :math:`x_i`,
+    the Log Softmax function is shown as follows:
+    .. math::
+        \text{output}(x_i) = \log \left(\frac{\exp(x_i)} {\sum_{j = 0}^{N-1}\exp(x_j)}\right),
+    where :math:`N` is the length of the Tensor.
+    Args:
+        input (Tensor): The input Tensor.
+        dim (int, optional): The axis to perform the Log softmax operation. Default: ``None`` .
+    Keyword Args:
+        dtype (:class:`mindspore.dtype`, optional): The desired dtype of returned Tensor. If not set to None, the input
+            Tensor will be cast to `dtype` before the operation is performed. This is useful for preventing overflows.
+            If set to None, stay the same as original Tensor. Default: ``None`` .
+    Returns:
+        Tensor, with the same shape as the input.
+    Raises:
+        TypeError: If `dim` is not an int.
+        ValueError: If `dim` is not in range [-len(input.shape), len(input.shape)).
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> logits = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> output = ops.function.nn_func.log_softmax_ext(logits, dim=-1)
+        >>> print(output)
+        [-4.4519143 -3.4519143 -2.4519143 -1.4519144 -0.4519144]
+    """
+    log_softmax_op = _get_cache_prim(P.LogSoftmaxExt)()
+    return log_softmax_op(input, dim, dtype)
 def softmin(x, axis=-1, *, dtype=None):
@@ -3131,82 +3150,34 @@ def softmin(x, axis=-1, *, dtype=None):
     if dtype is not None:
         x = ops.cast(x, dtype)
     softmax_ = _get_cache_prim(P.Softmax)(axis)
-    return softmax_(-1*x)
+    return softmax_(-1 * x)
+def soft_shrink(input, lambd=0.5):
+    r"""
+    `soft_shrink` is deprecated, please use `softshrink` instead.
+    """
+    logger.warning("`soft_shrink` is deprecated, please use `softshrink` instead.")
+    soft_shrink_op = _get_cache_prim(P.SoftShrink)(lambd)
+    return soft_shrink_op(input)
-def softshrink(x, lambd=0.5):
+def softplus(input, beta=1, threshold=20):  # pylint:disable=redefined-outer-name
     r"""
-    Applies the Softshrink function element-wise.
+    Applies softplus function to `input` element-wise.
+    The softplus function is shown as follows, x is the element of `input` :
     .. math::
-        \text{SoftShrink}(x) =
-        \begin{cases}
-        x - \lambda, & \text{ if } x > \lambda \\
-        x + \lambda, & \text{ if } x < -\lambda \\
-        0, & \text{ otherwise }
-        \end{cases}
-    SoftShrink Activation Function Graph:
+        \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
-    .. image:: ../images/Softshrink.png
-        :align: center
+    When :math:`input * beta > threshold`, the implementation converts to the linear function
+    to ensure numerical stability.
     Args:
-        x (Tensor): The input of soft shrink with data type of float16 or float32.
-        lambd (float): The :math:`\lambda` must be no less than zero. Default: ``0.5`` .
-    Returns:
-        Tensor, has the same shape and data type as `x`.
-    Raises:
-        TypeError: If `lambd` is not a float.
-        TypeError: If `x` is not a Tensor.
-        TypeError: If dtype of `x` is neither float16 nor float32.
-        ValueError: If `lambd` is less than 0.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> from mindspore import Tensor
-        >>> from mindspore import ops
-        >>> import numpy as np
-        >>> x = Tensor(np.array([[ 0.5297,  0.7871,  1.1754], [ 0.7836,  0.6218, -1.1542]]), mindspore.float32)
-        >>> output = ops.softshrink(x)
-        >>> print(output)
-        [[ 0.02979  0.287    0.676  ]
-         [ 0.2837   0.1216  -0.6543 ]]
-    """
-    soft_shrink_op = _get_cache_prim(P.SoftShrink)(lambd)
-    return soft_shrink_op(x)
-def soft_shrink(input, lambd=0.5):
-    r"""
-    `soft_shrink` is deprecated, please use `softshrink` instead.
-    """
-    logger.warning("`soft_shrink` is deprecated, please use `softshrink` instead.")
-    soft_shrink_op = _get_cache_prim(P.SoftShrink)(lambd)
-    return soft_shrink_op(input)
-def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
-    r"""
-    Applies softplus function to `input` element-wise.
-    The softplus function is shown as follows, x is the element of `input` :
-    .. math::
-        \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
-    When :math:`input * beta > threshold`, the implementation converts to the linear function
-    to ensure numerical stability.
-    Args:
-        input (Tensor) - Tensor of any dimension.
-            Supported dtypes:
+        input (Tensor) - Tensor of any dimension.
+            Supported dtypes:
             - GPU/CPU: float16, float32, float64.
             - Ascend: float16, float32.
@@ -3234,6 +3205,8 @@ def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
         >>> print(output)
         [0.7443967 0.79813886 30. 25.]
     """
+    if beta == 0:
+        raise ValueError("For 'softplus', the value of 'beta' cannot be 0.")
     scaling_input = beta * input
     op_output = (1 / beta) * softplus_(scaling_input)
     return ops.select(input * beta > threshold, input, op_output)
@@ -3332,6 +3305,50 @@ def logsigmoid(x):
     return ret
+def logsigmoid_ext(input):
+    r"""
+    Applies logsigmoid activation element-wise. The input is a Tensor with any valid shape.
+    Logsigmoid is defined as:
+    .. math::
+        \text{logsigmoid}(x_{i}) = \log(\frac{1}{1 + \exp(-x_i)}),
+    where :math:`x_{i}` is the element of the input.
+    LogSigmoid Activation Function Graph:
+    .. image:: ../images/LogSigmoid.png
+        :align: center
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): The input of LogSigmoid with data type of bfloat16, float16 or float32.
+            The shape is :math:`(*)` where :math:`*` means, any number of additional dimensions.
+    Returns:
+        Tensor, with the same type and shape as the `input`.
+    Raises:
+        TypeError: If dtype of `input` is not bfloat16, float16 and float32.
+        TypeError: If `input` is not a Tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor([1.0, 2.0, 3.0], mindspore.float32)
+        >>> output = ops.function.nn_func.logsigmoid_ext(input)
+        >>> print(output)
+        [-0.31326166 -0.12692806 -0.04858734]
+    """
+    return logsigmoid_op(input)[0]
 def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
     """Check that the output has the correct shape after adding bias."""
     if input_shape != output_shape:
@@ -3678,6 +3695,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
         ``Ascend``
     Examples:
+        >>> import mindspore as ms
         >>> from mindspore import ops
         >>> import numpy as np
         >>> x = ms.Tensor(np.arange(1 * 2 * 2 * 2).reshape((1, 2, 2, 2)), dtype=ms.float64)
@@ -4526,6 +4544,7 @@ def intopk(x1, x2, k):
     _in_topk = _get_cache_prim(P.InTopK)(k)
     return _in_topk(x1, x2)
 def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"):
     r"""
     Local Response Normalization.
@@ -5629,7 +5648,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         raise TypeError(f"For 'conv1d', the 'bias' must be a Tensor, but got {type(bias)}.")
     if bias.shape[0] != out_channel:
         raise ValueError(f"For 'conv1d', given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
-                        f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
+                         f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
     output = bias_add(squeezed_conv_res, bias)
     return output
@@ -5767,12 +5786,207 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
         raise TypeError(f"For 'conv2d', the 'bias' must be a Tensor, but got {type(bias)}.")
     if bias.shape[0] != out_channel:
         raise ValueError(f"For 'conv2d', Given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
-                        f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
+                         f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
     conv_result = conv(input, weight)
     output = bias_add(conv_result, bias)
     return output
+def _check_stride_when_same_mode(stride):
+    """ stride must be 1 when pad mode is same """
+    if isinstance(stride, int):
+        if stride != 1:
+            raise ValueError(f"For conv2d, 'padding=same' is not supported for stride convolution, " \
+                             f"but got {stride}")
+    elif isinstance(stride, tuple):
+        validator.check_int(len(stride), 2, validator.EQ, "stride", 'conv2d')
+        if not all(s == 1 for s in stride):
+            raise ValueError(f"For conv2d, 'padding=same' is not supported for stride convolution, " \
+                             f"but got {stride}")
+    else:
+        raise TypeError(f"For conv2d, the parameter 'stride' must be a int/tuple, but got {type(stride)}")
+def _get_pad_info(dilation, weight):
+    """ Get pad list by dilation and weight shape """
+    need_pad_nd = False
+    pad_l = ()
+    pad_r = ()
+    for i in range(2):
+        d = dilation[i]
+        weight_size = weight.shape[i + 2]
+        pad = d * (weight_size - 1)
+        pad_l += (int(pad / 2),)
+        pad_r += (int(pad - pad_l[i]),)
+        if pad_l[i] != pad_r[i]:
+            need_pad_nd = True
+    return need_pad_nd, pad_l, pad_r
+def _get_pad_nd_info(pad_l, pad_r):
+    """ Get pad_nd list if input need to exec pad_nd """
+    pad_nd = ()
+    new_pad_l = ()
+    for i in range(2):
+        delta_pad = pad_r[i] - pad_l[i]
+        if delta_pad > 0:
+            pad_nd = (0, delta_pad,) + pad_nd
+            new_pad_l += (pad_l[i],)
+        else:
+            pad_nd = (delta_pad, 0,) + pad_nd
+            new_pad_l += (pad_r[i],)
+    return pad_nd, new_pad_l
+def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    r"""
+    Applies a 2D convolution over an input tensor. The input tenor is typically of
+    shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
+    channel number, :math:`H` is feature height, :math:`W` is feature width.
+    The output is calculated based on formula:
+    .. math::
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
+    where :math:`bias` is the output channel bias, :math:`ccor` is
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
+    Here are the indices' meanings:
+    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
+      where :math:`N` is the batch size of the input.
+    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
+      where :math:`C_{out}` is the number of output channels, which is also equal to the number of kernels.
+    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
+      where :math:`C_{in}` is the number of
+      input channels, which is also equal to the number of channels in the convolutional kernels.
+    Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
+    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
+    channel in the :math:`i`-th batch of the input feature map.
+    The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the kernel,
+    respectively.
+    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
+    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
+    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
+    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
+    `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
+    Note:
+        On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
+        That is, when `groups>1`, condition :math:`C_{in}` = :math:`C_{out}` = `groups` must be satisfied.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+        weight (Tensor): Tensor of shape
+            :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
+            is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is ``None`` , zeros will be used. Default: ``None`` .
+        stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
+            the height and width of movement are both strides, or a tuple of two int numbers that
+            represent height and width of movement respectively. Default: ``1`` .
+        padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
+            Can be a string, one integer or a tuple/list with 2 integers.
+            If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
+            - same: Adopts the way of completion. The height and width of the output will be equal to
+              the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
+              left and right possiblily. Otherwise, the last extra padding will be calculated from the bottom
+              and the right side. If this mode is set, `padding` must be 0.
+            - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
+              without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
+            If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
+            If `padding` is a tuple/list with 2 integers, the padding of top adn bottom is padding[0],
+            and the padding of left and right is padding[1]. Default: ``0`` .
+        dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
+            2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
+            there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
+            be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
+        groups (int, optional): Splits `input` into groups. Default: ``1`` .
+    Returns:
+        Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv2d` for more details.
+    Raises:
+        TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
+        TypeError: `groups` is not an int.
+        TypeError: If `bias` is not a Tensor.
+        ValueError: If  the shape of `bias` is not :math:`(C_{out})` .
+        ValueError: If `stride` or `dilation` is less than 1.
+        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
+        ValueError: If `padding` is a tuple/list whose length is not equal to 2.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore.ops.function.nn_func import conv2d_ext
+        >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
+        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
+        >>> output = conv2d_ext(x, weight)
+        >>> print(output.shape)
+        (10, 32, 30, 30)
+    """
+    def _convolution_same(input, weight, bias, dilation, groups):
+        """ convolution when mode is 'same' """
+        if isinstance(dilation, int):
+            dilation = (dilation,) * 2
+        validator.check_int(len(weight.shape), 4, validator.EQ, "weight.shape", 'conv2d')
+        validator.check_int(len(dilation), 2, validator.EQ, "dilation", 'conv2d')
+        # Calc padding info
+        need_pad_nd, pad_l, pad_r = _get_pad_info(dilation, weight)
+        if not need_pad_nd:
+            conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
+            return conv(input, weight, bias)
+        # Calc pad nd info
+        pad_nd, pad_l = _get_pad_nd_info(pad_l, pad_r)
+        pad_nd_op = _get_cache_prim(ConstantPadND)()
+        padded_input = pad_nd_op(input, pad_nd, 0)
+        conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
+        return conv(padded_input, weight, bias)
+    if isinstance(padding, int):
+        padding = (padding,) * 2
+    if isinstance(padding, (tuple, list)):
+        conv = _get_cache_prim(Convolution)(stride, padding, dilation, False, (0, 0), groups)
+        return conv(input, weight, bias)
+    if isinstance(padding, str):
+        if padding == 'valid':
+            conv = _get_cache_prim(Convolution)(stride, (0, 0), dilation, False, (0, 0), groups)
+            return conv(input, weight, bias)
+        if padding == 'same':
+            _check_stride_when_same_mode(stride)
+            return _convolution_same(input, weight, bias, dilation, groups)
+        raise ValueError(f"For conv2d, the parameter 'padding' must be 'same' or 'valid' when " \
+                         f"the type of 'padding' is string.")
+    raise TypeError(f"For conv2d, the parameter 'padding' must be a tuple/list " \
+                    f"or a string, but got {type(padding)}")
 def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
     r"""
     Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
@@ -5842,51 +6056,6 @@ def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_paddi
     return conv(input, weight, bias)
-def hardsigmoid(input):
-    r"""
-    Hard sigmoid activation function.
-    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
-    Hard sigmoid is defined as:
-    .. math::
-        \text{hsigmoid}(x_{i}) = \max(0, \min(1, \frac{x_{i} + 3}{6}))
-    where :math:`x_i` is an element of the input Tensor.
-    HSigmoid Activation Function Graph:
-    .. image:: ../images/HSigmoid.png
-        :align: center
-    Args:
-        input (Tensor): The input Tensor.
-    Returns:
-        A Tensor whose dtype and shape are the same as `input`.
-    Raises:
-        TypeError: If `input` is not a Tensor.
-        TypeError: If dtype of `input` is not int or float.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([ -3.5,  0,  4.3]), mindspore.float32)
-        >>> output = ops.hardsigmoid(x)
-        >>> print(output)
-        [0.  0.5 1. ]
-    """
-    hardsigmoid_ = NN_OPS.HSigmoid()
-    return hardsigmoid_(input)
 def hardtanh(input, min_val=-1.0, max_val=1.0):
     r"""
     Applies the hardtanh activation function element-wise. The activation function is defined as:
@@ -6066,6 +6235,7 @@ def adaptive_avg_pool1d(input, output_size):
         >>> print(output.shape)
         (1, 3, 2)
     """
     def _check(x, output_size):
         x_in_shape = x.shape
         x_dtype = dtype_(x)
@@ -6161,6 +6331,74 @@ def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
     return layer_norm_ext_op(input, normalized_shape, weight, bias, eps)[0]
+def add_layer_norm(x1, x2, gamma, beta, epsilon=1e-5, additional_output=False):
+    r"""
+    Implements the add_layer_norm algorithm.
+    .. math::
+        \begin{aligned}
+        x = x1 + x2                                                                      \\
+        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta \\
+        \end{aligned}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        x1 (Tensor): Input of Addition Calculation in AddLayerNorm. `x1 + x2` will be calculated in the operator
+            and the calculation result is normalized. Data type is float16, bfloat16 or float32 .
+        x2 (Tensor): Input of Addition Calculation in AddLayerNorm. `x1 + x2` will be calculated in the operator
+            and the calculation result is normalized. Has the same dtype and shape as the `x1`.
+        gamma (Tensor): Learnable parameter :math:`\gamma` . Tensor of shape is 1D, keep same with last
+            dimension `x1` .
+        beta (Tensor): Learnable parameter :math:`\beta` . Tensor of shape is 1D, keep same with last dimension `x1` .
+        epsilon (float, optional): A value added to the denominator for numerical stability(:math:`\epsilon`).
+            Default: ``1e-5`` .
+        additional_output (bool, optional): Indicates whether to enable the output of `x=x1+x2`.
+            Default: ``False`` .
+    Returns:
+        tuple [Tensor], tuple of 4 Tensors. the output of normalized input and the updated parameters.
+        - **y** (Tensor) - Output of normalization, has the same type as the `x1`.
+        - **mean** (Tensor) - The mean of input, has the same type as the `x1`.
+        - **rstd** (Tensor) - The reciprocal of the input standard deviation. Shape is the same as `mean` .
+        - **x** (Tensor) - output of `x1 + x2`.
+    Raises:
+        TypeError: If `x1` is not a Tensor.
+        TypeError: If `x2` is not a Tensor.
+        TypeError: If `gamma` is not a Tensor.
+        TypeError: If `beta` is not a Tensor.
+        TypeError: If `epsilon` is not a float.
+        TypeError: If `additional_output` is not a bool.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> x1 = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32)
+        >>> x2 = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32)
+        >>> gamma = Tensor(np.ones([3]), mindspore.float32)
+        >>> beta = Tensor(np.zeros([3]), mindspore.float32)
+        >>> epsilon = 1e-7
+        >>> output = ops.add_layer_norm(x1, x2, gamma, beta, epsilon)
+        >>> print(output[0])
+        [[-1.2247448 0 1.2247448]
+         [-1.2247448 0 1.2247448]]
+        >>> print(output[1])
+        [[4]
+         [4]]
+        >>> print(output[2])
+        [[0.6123724]
+         [0.6123724]]
+    """
+    return add_layernorm_v2_op(x1, x2, gamma, beta, epsilon, additional_output)
 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
     r"""Group Normalization over a mini-batch of inputs.
@@ -6285,6 +6523,7 @@ def batch_norm_ext(input, running_mean, running_var, weight=None, bias=None, tra
     output = batch_norm_ext_op(input, weight, bias, running_mean, running_var, training, momentum, eps)
     return output[0]
 def batch_norm(input_x, running_mean, running_var, weight, bias, training=False, momentum=0.1, eps=1e-5):
     r"""
     Batch Normalization for input data and updated parameters.
@@ -6437,7 +6676,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
             - ``'sum'``: the output elements will be summed.
     Returns:
-        Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
+        Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is ``'none'``.
         Otherwise, returns a scalar Tensor.
     Raises:
@@ -6630,7 +6869,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     in_channel = input_shape[1]
     if not (in_channel % groups == 0 and out_channel % groups == 0):
         raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
-                        "and 'out_channel'")
+                         "and 'out_channel'")
     if isinstance(padding, (list, tuple)):
         padding = _manipulate_padding(padding, dim=3)
     conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
@@ -7152,6 +7391,9 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
     .. math::
         f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
+    Note:
+        This interface currently does not support Atlas A2 training series products.
     Args:
         x (Tensor): Tensor of shape :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
         norm_type (Union[int, float]): Type of normalization, represents p in the formula, can not be 0,
@@ -7229,6 +7471,9 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
     .. math::
         f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
+    Note:
+        This interface currently does not support Atlas A2 training series products.
     Args:
         x (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})`.
         norm_type (Union[int, float]): Type of normalization, represents p in the formula, can not be 0,
@@ -7382,6 +7627,9 @@ def msort(input):
     ops.msort(t) is equivalent to ops.Sort(axis=0)(t)[0]. See also :class:`mindspore.ops.Sort()`.
+    .. Note::
+        The Ascend backend only supports sorting the 1D input.
     Args:
         input (Tensor): The input to sort, with float16 or float32 data type.
@@ -7883,6 +8131,94 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
     return out
+def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ceil_mode=False, return_indices=False):
+    r"""
+    Performs a 2D max pooling on the input Tensor.
+    Typically, the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs
+    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given `kernel_size`
+    :math:`ks = (h_{ker}, w_{ker})` and `stride` :math:`s = (s_0, s_1)`, the operation is as follows:
+    .. math::
+        \text{output}(N_i, C_j, h, w) =
+        \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
+        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
+    .. warning::
+        Only support on Atlas A2 training series.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of float32
+            in Ascend.
+        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
+            value, is an int number that represents height and width of the kernel, or a tuple of
+            two int numbers that represent height and width respectively.
+        stride (Union[int, tuple[int], None]): The distance of kernel moving, an int number that represents
+            the height and width of movement are both stride, or a tuple of two int numbers that
+            represent height and width of movement respectively.
+            Default: ``None`` , which indicates the moving step is `kernel_size` .
+        padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both
+            strides, or a tuple of two int numbers that represent height and width of movement respectively.
+            Default: ``0`` .
+        dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
+        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
+        return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
+    Returns:
+        If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`).
+        - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
+          It has the same data type as `input`.
+        .. math::
+            H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
+                \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
+        .. math::
+            W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
+                \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
+        - **argmax** (Tensor) - Index corresponding to the maximum value. In Ascend, data type is int32.
+          It will be return only when `return_indices` is True.
+    Raises:
+        TypeError: If `input` is not a Tensor.
+        ValueError: If length of shape of `input` is not equal to 4.
+        TypeError: If `kernel_size` , `stride` , `padding` or `dilation` is not int or tuple.
+        ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
+        ValueError: If `dilation` is not all 1.
+        ValueError: If `padding` is less than 0.
+        ValueError: If `padding` is more than half of `kernel_size`.
+        TypeError: If `ceil_mode` is not bool.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore.ops.function.nn_func import max_pool2d_ext
+        >>> input = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
+        >>> output_tensor, argmax = max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1),
+                                                          ceil_mode=False, return_indices=True)
+        >>> print(output_tensor.shape)
+        (20, 16, 24, 31)
+        >>> print(argmax.shape)
+        (20, 16, 24, 31)
+    """
+    strides = stride if (stride is not None) else kernel_size
+    if return_indices:
+        max_pool_func_ = _get_cache_prim(MaxPoolWithIndices)(kernel_size, strides, padding, dilation, ceil_mode)
+        out, indices = max_pool_func_(input)
+    else:
+        max_pool_func_ = _get_cache_prim(MaxPoolWithMask)(kernel_size, strides, padding, dilation, ceil_mode)
+        out, indices = max_pool_func_(input)
+    if return_indices:
+        return out, indices
+    return out
 def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift,
                            deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads,
                            scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
@@ -7959,62 +8295,157 @@ def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, act
                quant_scale1, deq_scale2, quant_scale2, quant_offset2)
-def incre_flash_attention(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1,
-                          dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table,
-                          num_heads, input_layout="BSH", scale_value=1.0, num_key_value_heads=0, block_size=0,
-                          inner_precise=1):
+def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=None, pse_shift=None,
+                          dequant_scale1=None, quant_scale1=None, dequant_scale2=None, quant_scale2=None,
+                          quant_offset2=None, antiquant_scale=None, antiquant_offset=None, block_table=None,
+                          num_heads=1, input_layout='BSH', scale_value=1.0, num_key_value_heads=0,
+                          block_size=0, inner_precise=1, kv_padding_size=None):
     r"""
-    The interface for fully inference.
     B -- Batch size
+    N -- Num heads
+    kvN -- Num key value heads
     S -- Sequence length
+    D -- Head dim
     H -- Hidden size
+    kvH -- Hidden size of key value
+    where :math:`H=N\times D`, :math:`kvH=kvN\times D`
+    Self attention constructs an attention model based on the relationship between input samples themselves. The
+    principle is to assume that there is a length of the input sample sequence :math:`x` of :math:`n`, and each
+    element of :math:`x` is a :math:`d` dimensional vector, which can be viewed as a token embedding. This sequence
+    can be transformed through 3 weight matrices to obtain 3 matrices with dimensions of :math:`n\times d`. The self
+    attention calculation formula is defined as:
+    .. math::
+        Attention(Q,K,V)=Softmax(\frac{QK^{T} }{\sqrt{d} } )V
+    where the product of :math:`Q` and :math:`K^{T}` represents the attention of input :math:`x`. To avoid the value
+    becoming too large, it is usually scaled by dividing it by the square root of :math:`d` and perform softmax
+    normalization on each row, yields a matrix of :math:`n\times d` after multiplying :math:`V`.
     .. warning::
         This is an experimental API that is subject to change or deletion.
-        If there is no input parameter and no default value, None needs to be passed.
-    Inputs:
-        - **query** (Tensor) - The query tensor with data type of float16 or bfloat16.
-          Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
-        - **key** (TensorList) - The key tensor with data type of float16 or bfloat16.
-          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
-        - **value** (TensorList) - The value tensor with data type of float16 or bfloat16.
-          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
-        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or bool.
-          Input tensor of shape :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`.
-        - **actual_seq_lengths** (Tensor) - Describe actual sequence length of each input with data type of int.
-        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
-        - **dequant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
-        - **quant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of float.
-        - **dequant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
-        - **quant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of float.
-        - **quant_offset2** (Tensor) - Quantitative parametor, the tensor with data type of float.
-        - **antiquant_scale** (Tensor) - Quantitative parametor, the tensor with data type of float.
-        - **antiquant_offset** (Tensor) - Quantitative parametor, the tensor with data type of float.
-        - **block_table** (Tensor) - The tensor with data type of float.
-        - **num_heads**  (int) - The number of heads.
-        - **input_layout** (str) - the data layout of the input qkv, support `(BSH)` and `(BNSD)`. Default `BSH`.
-        - **scale_value** (double) - The scale value indicating the scale coefficient, which is used as the scalar of
-          Muls in the calculation. Default: 1.0.
-        - **num_key_value_heads** (int) - head numbers of key/value which are used in GQA algorithm.
-          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
-        - **block_size** (int) - Default: 0.
-        - **inner_precise** (int) - Default: 1.
+    Note:
+      - If there is no input parameter and no default value, None needs to be passed.
+      - The shape of the tensor corresponding to the key and value parameters needs to be completely consistent.
+      - :math:`N` of parameter query is equal with num_heads. :math:`N` of parameter key and parameter value is equal
+        with num_key_value_heads. num_heads is a multiple of num_key_value_heads.
+      - Quantization
+        - When the data type of query, key, and value is float16 and the data type of output is int8, the input
+          parameter quant_scale2 is required and quant_offset2 is optional.
+        - When antiquant_scale exists, key and value need to be passed by int8. antiquant_offset is optional.
+        - The data type of antiquant_scale and antiquant_offset should be consistency with that of query.
+      - pse_shift
+        - The pse_shift data type needs to be consistent with the query data type, and only supports D-axis alignment,
+          which means that the D-axis can be divided by 16.
+      - Page attention:
+        - The necessary condition for enabling page attention is that the block_table exists, and the key
+          and value are arranged in a contiguous memory according to the index in the block_table. The support for
+          key and value dtypes is float16/bfloat16/int8.
+        - In the enabling scenario of page attention, 16 alignment is required when input types of key and value are
+          float16/bfloat16, and 32 alignment is required when input types of key and value are int8. It is
+          recommended to use 128.
+        - The maximum max_block_num_per_seq currently supported by blocktable is 16k, and exceeding 16k will result
+          in interception and error messages; If you encounter :math:`S` being too large and causing
+          max_block_num_per_seq to exceed 16k, you can increase the block_size to solve the problem.
+        - The multiplication of all dimensions of the shape of the parameters key and value in the page attention
+          scenario cannot exceed the representation range of int32.
+        - When performing per-channel post quantization, page attention cannot be enabled simultaneously.
+      - kv_padding_size:
+        - The calculation formula for the starting point of KV cache transfer is
+          :math:`S-kv\_padding\_size-actual\_seq\_lengths`. The calculation formula for the transfer endpoint of KV
+          cache is :math:`S-kv\_padding\_size`. When the starting or ending point of the KV cache transfer is less
+          than 0, the returned data result is all 0.
+        - When kv_padding_size is less than 0, it will be set to 0.
+        - kv_padding_size needs to be enabled together with the actual_seq_lengths parameter, otherwise it is
+          considered as the KV right padding scene.
+        - It needs to be enabled together with the atten_mask parameter and ensure that the meaning of atten_mask is
+          correct, that is, it can correctly hide invalid data. Otherwise, it will introduce accuracy issues.
+        - kv_padding_size does not support page attention scenarios
-    Outputs:
-        - **attention_out** (Tensor) - Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
+    Args:
+        query (Tensor): The query tensor with data type of float16 or bfloat16.
+            The shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
+        key (TensorList): The key tensor with data type of float16 or bfloat16 or int8.
+            The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
+        value (TensorList): The value tensor with data type of float16 or bfloat16 or int8.
+            The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
+        attn_mask (Tensor, optional): The attention mask tensor with data type of bool or int8 or uint8.
+            The shape is :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`. Default: ``None``.
+        actual_seq_lengths (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
+            input with data type of int32 or int64. The shape is :math:`(B, )`. Default: ``None``.
+        pse_shift (Tensor, optional): The position encoding tensor with data type of float16 or bfloat16. Input tensor
+            of shape :math:`(1, N, 1, S)` / :math:`(B, N, 1, S)`. Default: ``None``.
+        dequant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32. It
+            is disable now. Default: ``None``.
+        quant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of float32. It is disable
+            now. Default: ``None``.
+        dequant_scale2 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32. It
+            is disable now. Default: ``None``.
+        quant_scale2 (Tensor, optional): Post Quantitative parametor, the tensor with data type of float32.
+            The shape is :math:`(1,)`. Default: ``None``.
+        quant_offset2 (Tensor, optional): Post Quantitative parametor, the tensor with data type of float32.
+            The shape is :math:`(1,)`. Default: ``None``.
+        antiquant_scale (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
+            bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
+          input_layout is 'BSH'. Default: ``None``.
+        antiquant_offset (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
+            bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
+          input_layout is 'BSH'. Default: ``None``.
+        block_table (Tensor, optional): The tensor with data type of int32. The shape is
+            :math:`(B, max\_block\_num\_per\_seq)`,
+            where :math:`max\_block\_num\_per\_seq = ceil(\frac{max(actual\_seq\_length)}{block\_size} )`.
+            Default: ``None``.
+        num_heads (int): The number of heads.
+        input_layout (str): The data layout of the input qkv, support 'BSH' and 'BNSD'. Default ``'BSH'``.
+        scale_value (double): The scale value indicating the scale coefficient, which is used as the scalar of
+            Muls in the calculation. Default: ``1.0``.
+        num_key_value_heads (int): Head numbers of key/value which are used in GQA algorithm.
+            The value 0 indicates if the key and value have the same head nums, use numHeads.  Default: ``0``.
+        block_size (int): The maximum number of tokens stored in each block of KV in page attention. Default: ``0``.
+        inner_precise (int): Default: ``1``.
+        kv_padding_size (Tensor, optional): The tensor with data type of int64. The range of values is
+            :math:`0\le kv\_padding\_size \le  S-max(actual\_seq\_length)`. The shape is :math:`()` or :math:`(1,)`.
+            Default: ``None``.
+    Returns:
+        attention_out (Tensor), the shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
     Supported Platforms:
         ``Ascend``
-    """
+    Examples:
+        >>> from mindspore import ops
+        >>> from mindspore.common import Tensor
+        >>> from mindspore.common import dtype as mstype
+        >>> import numpy as np
+        >>> B, N, S, D, kvN = 1, 4, 10, 128, 1
+        >>> query = Tensor(np.random.randn(B, 1, N * D), mstype.float16)
+        >>> key = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
+        >>> value = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
+        >>> ifa_ms = ops.functional.incre_flash_attention
+        >>> attn_out = ifa_ms(query, key, value, num_heads=N, num_key_value_heads=kvN)
+        >>> attn_out
+        Tensor(shape=[1, 1, 512], dtype=Float16, value=
+        [[[ 1.6104e+00,  7.3438e-01,  1.0684e+00 ... -8.7891e-01,  1.7695e+00,  1.0264e+00]]])
+    """
     _ifa = _get_cache_prim(NN_OPS.IncreFlashAttention)(num_heads, input_layout, scale_value, num_key_value_heads,
                                                        block_size, inner_precise)
     return _ifa(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1,
-                dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table)
+                dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table,
+                kv_padding_size)
 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False):
@@ -8066,14 +8497,13 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
           [ 5.49015924e-02,  3.47811311e-01, -1.89771220e-01],
           [ 2.09307984e-01, -2.24846993e-02,  3.40124398e-01]]]
     """
-    if not isinstance(weight, Parameter):
-        raise TypeError(f"For Embedding, the weight must be a mindspore.Parameter, but got {type(weight)}.")
     return embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
 __all__ = [
     'adaptive_avg_pool1d',
     'adaptive_avg_pool2d',
+    'adaptive_avg_pool2d_ext',
     'adaptive_avg_pool3d',
     'adaptive_max_pool1d',
     'adaptive_max_pool2d',
@@ -8104,6 +8534,7 @@ __all__ = [
     'pixel_unshuffle',
     'hardshrink',
     'is_floating_point',
+    'incre_flash_attention',
     'flip',
     'fliplr',
     'flipud',
@@ -8111,6 +8542,7 @@ __all__ = [
     'interpolate',
     'upsample',
     'layer_norm',
+    'mse_loss_ext',
     'log_softmax',
     'mish',
     'lrn',
@@ -8149,6 +8581,7 @@ __all__ = [
     'relu',
     'relu6',
     'rrelu',
+    'swiglu',
     'conv3d',
     'glu',
     'margin_ranking_loss',
@@ -8169,6 +8602,7 @@ __all__ = [
     'triplet_margin_loss',
     'channel_shuffle',
     'hardsigmoid',
+    'add_layer_norm',
     'group_norm',
     'rms_norm',
 ]