PyPI - mindspore - Versions diffs - 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl - Mend

mindspore 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (493) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +6 -4
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -0
mindspore/_checkparam.py +3 -33
mindspore/_deprecated/__init__.py +17 -0
mindspore/_deprecated/jit.py +198 -0
mindspore/_extends/builtin_operations.py +1 -1
mindspore/_extends/parse/__init__.py +6 -7
mindspore/_extends/parse/compile_config.py +19 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +22 -3
mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
mindspore/_extends/parse/parser.py +25 -194
mindspore/_extends/parse/resources.py +1 -5
mindspore/_extends/parse/standard_method.py +109 -75
mindspore/_extends/pijit/__init__.py +2 -2
mindspore/_extends/pijit/pijit_func_white_list.py +16 -11
mindspore/_extends/pijit/tensor_func_list.py +27 -0
mindspore/_extends/utils.py +1 -1
mindspore/amp.py +4 -4
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/__init__.py +2 -2
mindspore/boost/base.py +3 -7
mindspore/boost/boost_cell_wrapper.py +2 -2
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +4 -3
mindspore/common/_grad_function.py +56 -0
mindspore/common/_pijit_context.py +14 -5
mindspore/common/_register_for_tensor.py +1 -1
mindspore/common/_stub_tensor.py +5 -10
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +2014 -3386
mindspore/common/api.py +386 -355
mindspore/common/auto_dynamic_shape.py +41 -44
mindspore/common/dtype.py +5 -2
mindspore/common/dump.py +7 -5
mindspore/common/file_system.py +3 -0
mindspore/common/generator.py +3 -0
mindspore/common/hook_handle.py +5 -3
mindspore/common/initializer.py +10 -6
mindspore/common/jit_begin_end.py +94 -0
mindspore/common/jit_config.py +6 -1
mindspore/common/jit_context.py +76 -0
mindspore/common/jit_trace.py +378 -0
mindspore/common/lazy_inline.py +2 -2
mindspore/common/mutable.py +5 -4
mindspore/common/parameter.py +106 -39
mindspore/common/seed.py +2 -2
mindspore/common/sparse_tensor.py +23 -17
mindspore/common/tensor.py +332 -714
mindspore/communication/__init__.py +7 -5
mindspore/communication/_comm_helper.py +47 -2
mindspore/communication/comm_func.py +70 -53
mindspore/communication/management.py +83 -17
mindspore/context.py +228 -571
mindspore/dataset/__init__.py +44 -20
mindspore/dataset/audio/__init__.py +2 -8
mindspore/dataset/audio/transforms.py +3 -17
mindspore/dataset/core/config.py +3 -3
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +102 -120
mindspore/dataset/engine/datasets_audio.py +22 -22
mindspore/dataset/engine/datasets_standard_format.py +43 -24
mindspore/dataset/engine/datasets_text.py +78 -85
mindspore/dataset/engine/datasets_user_defined.py +109 -77
mindspore/dataset/engine/datasets_vision.py +111 -108
mindspore/dataset/engine/iterators.py +5 -3
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
mindspore/dataset/engine/samplers.py +279 -57
mindspore/dataset/engine/serializer_deserializer.py +2 -1
mindspore/dataset/engine/validators.py +10 -0
mindspore/dataset/text/__init__.py +7 -6
mindspore/dataset/text/transforms.py +6 -5
mindspore/dataset/text/utils.py +3 -3
mindspore/dataset/transforms/__init__.py +0 -9
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/utils/browse_dataset.py +1 -1
mindspore/dataset/vision/__init__.py +2 -9
mindspore/dataset/vision/transforms.py +202 -158
mindspore/dataset/vision/utils.py +7 -5
mindspore/device_context/ascend/op_debug.py +60 -1
mindspore/device_context/ascend/op_tuning.py +0 -4
mindspore/device_manager.py +39 -3
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/es/embedding_service.py +35 -27
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -2
mindspore/experimental/map_parameter.py +4 -4
mindspore/experimental/optim/adadelta.py +22 -26
mindspore/experimental/optim/adagrad.py +4 -4
mindspore/experimental/optim/adam.py +4 -0
mindspore/experimental/optim/adamax.py +4 -4
mindspore/experimental/optim/adamw.py +4 -0
mindspore/experimental/optim/asgd.py +1 -1
mindspore/experimental/optim/lr_scheduler.py +40 -22
mindspore/experimental/optim/radam.py +5 -5
mindspore/experimental/optim/rprop.py +1 -1
mindspore/experimental/optim/sgd.py +1 -1
mindspore/hal/contiguous_tensors_handle.py +6 -10
mindspore/hal/device.py +55 -81
mindspore/hal/event.py +38 -55
mindspore/hal/memory.py +115 -147
mindspore/hal/stream.py +81 -125
mindspore/include/dataset/constants.h +7 -4
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +40 -2
mindspore/mindrecord/__init__.py +20 -7
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +133 -702
mindspore/mint/distributed/__init__.py +5 -1
mindspore/mint/distributed/distributed.py +198 -113
mindspore/mint/linalg/__init__.py +2 -0
mindspore/mint/nn/__init__.py +280 -18
mindspore/mint/nn/functional.py +282 -64
mindspore/mint/nn/layer/__init__.py +4 -0
mindspore/mint/nn/layer/_functions.py +7 -3
mindspore/mint/nn/layer/activation.py +120 -13
mindspore/mint/nn/layer/conv.py +234 -28
mindspore/mint/nn/layer/normalization.py +15 -16
mindspore/mint/nn/layer/padding.py +1 -1
mindspore/mint/nn/layer/pooling.py +66 -1
mindspore/mint/optim/__init__.py +2 -1
mindspore/mint/optim/sgd.py +171 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/__init__.py +4 -1
mindspore/nn/cell.py +1253 -179
mindspore/nn/layer/activation.py +23 -21
mindspore/nn/layer/basic.py +22 -16
mindspore/nn/layer/container.py +1 -1
mindspore/nn/layer/conv.py +53 -42
mindspore/nn/layer/embedding.py +9 -8
mindspore/nn/layer/normalization.py +48 -42
mindspore/nn/layer/pooling.py +75 -31
mindspore/nn/layer/transformer.py +11 -10
mindspore/nn/learning_rate_schedule.py +4 -2
mindspore/nn/loss/loss.py +27 -19
mindspore/nn/optim/ada_grad.py +6 -5
mindspore/nn/optim/adadelta.py +9 -7
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +18 -14
mindspore/nn/optim/adamax.py +8 -7
mindspore/nn/optim/adasum.py +5 -5
mindspore/nn/optim/asgd.py +3 -1
mindspore/nn/optim/ftrl.py +11 -9
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/lazyadam.py +12 -10
mindspore/nn/optim/momentum.py +7 -6
mindspore/nn/optim/optimizer.py +2 -2
mindspore/nn/optim/proximal_ada_grad.py +12 -10
mindspore/nn/optim/rmsprop.py +13 -12
mindspore/nn/optim/rprop.py +9 -7
mindspore/nn/optim/sgd.py +9 -6
mindspore/nn/optim/tft_wrapper.py +5 -2
mindspore/nn/probability/bijector/bijector.py +17 -11
mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
mindspore/nn/probability/bijector/invert.py +2 -2
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +3 -2
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +1 -1
mindspore/nn/probability/distribution/cauchy.py +4 -2
mindspore/nn/probability/distribution/exponential.py +6 -7
mindspore/nn/probability/distribution/gamma.py +2 -2
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/half_normal.py +5 -3
mindspore/nn/probability/distribution/logistic.py +5 -3
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/uniform.py +5 -3
mindspore/nn/reinforcement/_tensors_queue.py +1 -1
mindspore/nn/reinforcement/tensor_array.py +1 -1
mindspore/nn/wrap/__init__.py +6 -6
mindspore/nn/wrap/cell_wrapper.py +178 -117
mindspore/nn/wrap/grad_reducer.py +45 -36
mindspore/nn/wrap/loss_scale.py +3 -3
mindspore/numpy/array_creations.py +3 -3
mindspore/numpy/array_ops.py +1 -1
mindspore/numpy/utils.py +1 -2
mindspore/numpy/utils_const.py +1 -2
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +3 -2
mindspore/ops/_grad_experimental/grad_comm_ops.py +18 -3
mindspore/ops/_grad_experimental/grad_debug_ops.py +8 -1
mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
mindspore/ops/_register_for_op.py +0 -11
mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -4
mindspore/ops/_vmap/vmap_array_ops.py +32 -6
mindspore/ops/_vmap/vmap_grad_nn_ops.py +2 -1
mindspore/ops/_vmap/vmap_math_ops.py +4 -7
mindspore/ops/_vmap/vmap_nn_ops.py +9 -8
mindspore/ops/auto_generate/__init__.py +4 -3
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +127 -52
mindspore/ops/auto_generate/gen_extend_func.py +286 -208
mindspore/ops/auto_generate/gen_ops_def.py +2783 -2335
mindspore/ops/auto_generate/gen_ops_prim.py +8992 -2686
mindspore/ops/auto_generate/pyboost_inner_prim.py +106 -76
mindspore/ops/composite/__init__.py +2 -1
mindspore/ops/composite/base.py +19 -24
mindspore/ops/composite/math_ops.py +6 -16
mindspore/ops/composite/multitype_ops/__init__.py +5 -2
mindspore/ops/composite/multitype_ops/_compile_utils.py +4 -5
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +2 -1
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
mindspore/ops/function/__init__.py +28 -2
mindspore/ops/function/_add_attr_func.py +58 -0
mindspore/ops/function/array_func.py +1631 -2347
mindspore/ops/function/clip_func.py +38 -45
mindspore/ops/function/debug_func.py +36 -44
mindspore/ops/function/grad/__init__.py +1 -0
mindspore/ops/function/grad/grad_func.py +104 -71
mindspore/ops/function/image_func.py +1 -1
mindspore/ops/function/linalg_func.py +46 -78
mindspore/ops/function/math_func.py +3024 -3855
mindspore/ops/function/nn_func.py +678 -274
mindspore/ops/function/other_func.py +159 -1
mindspore/ops/function/parameter_func.py +17 -30
mindspore/ops/function/random_func.py +216 -361
mindspore/ops/function/reshard_func.py +4 -70
mindspore/ops/function/sparse_func.py +3 -3
mindspore/ops/function/sparse_unary_func.py +5 -5
mindspore/ops/function/spectral_func.py +25 -58
mindspore/ops/function/vmap_func.py +26 -18
mindspore/ops/functional.py +8 -5
mindspore/ops/functional_overload.py +655 -4
mindspore/ops/op_info_register.py +32 -244
mindspore/ops/operations/__init__.py +21 -14
mindspore/ops/operations/_custom_ops_utils.py +235 -0
mindspore/ops/operations/_grad_ops.py +1 -10
mindspore/ops/operations/_inner_ops.py +5 -76
mindspore/ops/operations/_ms_kernel.py +4 -10
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/_scalar_ops.py +3 -2
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/array_ops.py +39 -24
mindspore/ops/operations/comm_ops.py +150 -107
mindspore/ops/operations/custom_ops.py +287 -32
mindspore/ops/operations/debug_ops.py +119 -16
mindspore/ops/operations/inner_ops.py +1 -1
mindspore/ops/operations/linalg_ops.py +1 -58
mindspore/ops/operations/manually_defined/_inner.py +1 -1
mindspore/ops/operations/manually_defined/ops_def.py +746 -79
mindspore/ops/operations/math_ops.py +21 -18
mindspore/ops/operations/nn_ops.py +67 -224
mindspore/ops/operations/other_ops.py +62 -9
mindspore/ops/operations/random_ops.py +13 -7
mindspore/ops/operations/reshard_ops.py +1 -1
mindspore/ops/operations/sparse_ops.py +2 -2
mindspore/ops/primitive.py +43 -32
mindspore/ops/tensor_method.py +243 -17
mindspore/ops_generate/__init__.py +0 -5
mindspore/ops_generate/aclnn/__init__.py +0 -0
mindspore/ops_generate/{aclnn_kernel_register_auto_cc_generator.py → aclnn/aclnn_kernel_register_auto_cc_generator.py} +43 -18
mindspore/ops_generate/{gen_aclnn_implement.py → aclnn/gen_aclnn_implement.py} +49 -51
mindspore/ops_generate/api/__init__.py +0 -0
mindspore/ops_generate/{add_tensor_docs_generator.py → api/add_tensor_docs_generator.py} +9 -7
mindspore/ops_generate/{cpp_create_prim_instance_helper_generator.py → api/cpp_create_prim_instance_helper_generator.py} +6 -9
mindspore/ops_generate/{functional_map_cpp_generator.py → api/functional_map_cpp_generator.py} +25 -12
mindspore/ops_generate/{functional_overload_py_generator.py → api/functional_overload_py_generator.py} +8 -6
mindspore/ops_generate/{functions_cc_generator.py → api/functions_cc_generator.py} +14 -10
mindspore/ops_generate/api/gen_api.py +103 -0
mindspore/ops_generate/{op_api_proto.py → api/op_api_proto.py} +98 -69
mindspore/ops_generate/{tensor_func_reg_cpp_generator.py → api/tensor_func_reg_cpp_generator.py} +82 -43
mindspore/ops_generate/common/__init__.py +0 -0
mindspore/ops_generate/common/gen_constants.py +91 -0
mindspore/ops_generate/{gen_utils.py → common/gen_utils.py} +72 -19
mindspore/ops_generate/{op_proto.py → common/op_proto.py} +64 -1
mindspore/ops_generate/{template.py → common/template.py} +96 -84
mindspore/ops_generate/gen_ops.py +23 -325
mindspore/ops_generate/op_def/__init__.py +0 -0
mindspore/ops_generate/op_def/gen_op_def.py +90 -0
mindspore/ops_generate/{lite_ops_cpp_generator.py → op_def/lite_ops_cpp_generator.py} +47 -11
mindspore/ops_generate/{ops_def_cc_generator.py → op_def/ops_def_cc_generator.py} +18 -10
mindspore/ops_generate/{ops_def_h_generator.py → op_def/ops_def_h_generator.py} +5 -5
mindspore/ops_generate/{ops_name_h_generator.py → op_def/ops_name_h_generator.py} +30 -15
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
mindspore/ops_generate/op_def_py/__init__.py +0 -0
mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
mindspore/ops_generate/{op_def_py_generator.py → op_def_py/op_def_py_generator.py} +6 -5
mindspore/ops_generate/{op_prim_py_generator.py → op_def_py/op_prim_py_generator.py} +24 -15
mindspore/ops_generate/pyboost/__init__.py +0 -0
mindspore/ops_generate/{auto_grad_impl_cc_generator.py → pyboost/auto_grad_impl_cc_generator.py} +11 -7
mindspore/ops_generate/{auto_grad_reg_cc_generator.py → pyboost/auto_grad_reg_cc_generator.py} +7 -7
mindspore/ops_generate/{gen_pyboost_func.py → pyboost/gen_pyboost_func.py} +40 -16
mindspore/ops_generate/{op_template_parser.py → pyboost/op_template_parser.py} +105 -24
mindspore/ops_generate/{pyboost_functions_cpp_generator.py → pyboost/pyboost_functions_cpp_generator.py} +55 -18
mindspore/ops_generate/{pyboost_functions_h_generator.py → pyboost/pyboost_functions_h_generator.py} +42 -10
mindspore/ops_generate/{pyboost_functions_py_generator.py → pyboost/pyboost_functions_py_generator.py} +6 -6
mindspore/ops_generate/{pyboost_grad_function_cpp_generator.py → pyboost/pyboost_grad_function_cpp_generator.py} +11 -10
mindspore/ops_generate/{pyboost_inner_prim_generator.py → pyboost/pyboost_inner_prim_generator.py} +8 -7
mindspore/ops_generate/{pyboost_native_grad_functions_generator.py → pyboost/pyboost_native_grad_functions_generator.py} +14 -10
mindspore/ops_generate/{pyboost_op_cpp_code_generator.py → pyboost/pyboost_op_cpp_code_generator.py} +140 -53
mindspore/ops_generate/{pyboost_overload_functions_cpp_generator.py → pyboost/pyboost_overload_functions_cpp_generator.py} +28 -15
mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +88 -4
mindspore/ops_generate/resources/__init__.py +0 -0
mindspore/ops_generate/resources/resource_list.py +30 -0
mindspore/ops_generate/resources/resource_loader.py +36 -0
mindspore/ops_generate/resources/resource_manager.py +64 -0
mindspore/ops_generate/resources/yaml_loader.py +88 -0
mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
mindspore/parallel/__init__.py +6 -2
mindspore/parallel/_auto_parallel_context.py +140 -12
mindspore/parallel/_cell_wrapper.py +132 -15
mindspore/parallel/_parallel_serialization.py +95 -4
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +7 -2
mindspore/parallel/_tensor.py +142 -18
mindspore/parallel/_utils.py +198 -25
mindspore/parallel/algo_parameter_config.py +3 -3
mindspore/parallel/auto_parallel.py +732 -0
mindspore/parallel/checkpoint_convert.py +159 -0
mindspore/parallel/checkpoint_transform.py +658 -37
mindspore/parallel/cluster/process_entity/_api.py +151 -19
mindspore/parallel/cluster/run.py +1 -1
mindspore/parallel/function/__init__.py +24 -0
mindspore/parallel/function/reshard_func.py +258 -0
mindspore/parallel/nn/__init__.py +25 -0
mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
mindspore/parallel/parameter_broadcast.py +24 -13
mindspore/parallel/shard.py +137 -62
mindspore/parallel/transform_safetensors.py +288 -95
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +9 -5
mindspore/profiler/analysis/parser/ascend_cann_parser.py +6 -2
mindspore/profiler/analysis/parser/ms_framework_parser.py +4 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +25 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +241 -86
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +41 -2
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +33 -35
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +7 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +8 -3
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +141 -30
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +5 -6
mindspore/profiler/common/ascend_msprof_exporter.py +5 -4
mindspore/profiler/common/constant.py +12 -0
mindspore/profiler/common/msprof_cmd_tool.py +42 -23
mindspore/profiler/common/path_manager.py +24 -0
mindspore/profiler/common/profiler_context.py +26 -2
mindspore/profiler/common/profiler_meta_data.py +74 -0
mindspore/profiler/common/profiler_parameters.py +59 -18
mindspore/profiler/common/profiler_path_manager.py +66 -7
mindspore/profiler/dynamic_profiler.py +112 -79
mindspore/profiler/envprofiler.py +26 -1
mindspore/profiler/experimental_config.py +197 -0
mindspore/profiler/mstx.py +57 -14
mindspore/profiler/platform/npu_profiler.py +33 -7
mindspore/profiler/profiler.py +541 -45
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +4 -0
mindspore/profiler/schedule.py +57 -22
mindspore/rewrite/api/node.py +15 -13
mindspore/rewrite/api/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +25 -14
mindspore/run_check/run_check.py +1 -1
mindspore/runtime/__init__.py +2 -2
mindspore/runtime/executor.py +40 -11
mindspore/runtime/memory.py +37 -13
mindspore/safeguard/rewrite_obfuscation.py +12 -9
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +8 -8
mindspore/train/_utils.py +43 -9
mindspore/train/amp.py +1 -1
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +2 -16
mindspore/train/callback/_checkpoint.py +24 -40
mindspore/train/callback/_cluster_monitor.py +14 -18
mindspore/train/callback/_flops_collector.py +2 -3
mindspore/train/callback/_history.py +7 -4
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +0 -3
mindspore/train/callback/_loss_monitor.py +2 -1
mindspore/train/callback/_on_request_exit.py +6 -5
mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
mindspore/train/callback/_summary_collector.py +8 -13
mindspore/train/callback/_time_monitor.py +2 -1
mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -105
mindspore/train/data_sink.py +25 -2
mindspore/train/dataset_helper.py +4 -5
mindspore/train/loss_scale_manager.py +8 -7
mindspore/train/metrics/accuracy.py +3 -3
mindspore/train/metrics/confusion_matrix.py +9 -9
mindspore/train/metrics/error.py +3 -3
mindspore/train/metrics/hausdorff_distance.py +4 -4
mindspore/train/metrics/mean_surface_distance.py +3 -3
mindspore/train/metrics/metric.py +0 -12
mindspore/train/metrics/occlusion_sensitivity.py +4 -2
mindspore/train/metrics/precision.py +8 -6
mindspore/train/metrics/recall.py +9 -9
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +19 -12
mindspore/train/model.py +262 -127
mindspore/train/serialization.py +246 -988
mindspore/train/summary/_summary_adapter.py +2 -2
mindspore/train/summary/summary_record.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +3 -2
mindspore/utils/dryrun.py +4 -2
mindspore/utils/hooks.py +81 -0
mindspore/utils/runtime_execution_order_check.py +2 -0
mindspore/utils/utils.py +138 -4
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/METADATA +2 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/RECORD +485 -440
mindspore/_install_custom.py +0 -43
mindspore/common/_register_for_adapter.py +0 -74
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
mindspore/ops/auto_generate/gen_arg_handler.py +0 -136
mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
mindspore/ops_generate/gen_constants.py +0 -190
mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
mindspore/ops_generate/ops_primitive_h_generator.py +0 -81
/mindspore/ops_generate/{base_generator.py → common/base_generator.py} +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0

mindspore/ops/function/nn_func.py CHANGED Viewed

@@ -29,7 +29,7 @@ import mindspore.common.dtype as mstype
 from mindspore.ops.function.math_func import logsumexp, div
 from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
 from mindspore.common.tensor import Tensor
-from mindspore._c_expression import Tensor as Tensor_
+from mindspore._c_expression import TensorPy as Tensor_
 from mindspore.ops._primitive_cache import _get_cache_prim
 from mindspore import _checkparam as validator
 from mindspore.ops.composite.multitype_ops._constexpr_utils import raise_value_error
@@ -41,10 +41,11 @@ from mindspore.ops.operations.nn_ops import TripletMarginLoss
 from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
 from mindspore.common.api import _function_forbid_reuse
 from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, fast_gelu, silu, elu, sigmoid, relu6, \
-    softmax_impl, swiglu, logsigmoid_op
+    softmax_impl, swiglu, logsigmoid_op, kl_div_op, divs_op
 from mindspore.ops.auto_generate import relu_op, inplace_relu_op
 from mindspore.ops.auto_generate import group_norm_op, rms_norm, add_rms_norm, layer_norm_ext_op, batch_norm_ext_op,\
     mse_loss_ext
+# 1
 from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, add_layernorm_v2_op,
                                          reflection_pad_3d_op,  # pylint: disable=W0611
                                          replication_pad_1d_op, replication_pad_2d_op, replication_pad_3d_op,
@@ -53,16 +54,58 @@ from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d
                                          upsample_linear1d_op, upsample_bilinear2d_op, upsample_bicubic2d_op,
                                          upsample_trilinear3d_impl, fill_scalar_op, floor_op, nllloss_2d_op,
                                          masked_fill_op, masked_select, ones, flatten_ext, conv_transpose2d)
+# 2
+# 3
+# 4
+# 5
+# 6
+# 7
+# 8
+# 9
+# 10
+# 11
+# 12
+# 13
+# 14
+# 15
+from mindspore.ops.auto_generate import avg_pool3d_ext_op
+# 16
+# 17
+# 18
+# 19
+# 20
 from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, MaxPoolWithIndices, \
     PromptFlashAttention, MaxPoolWithMask
-from mindspore.ops.auto_generate.gen_ops_prim import conv3d_ext_op, conv3d_padding_op, conv2d_ext_op, conv2d_padding_op
+from mindspore.ops.auto_generate.gen_ops_prim import conv3d_ext_op, conv3d_padding_op, conv2d_ext_op, \
+    conv2d_padding_op, conv1d_ext_op, conv1d_padding_op, speed_fusion_attention_op
 from mindspore.common.generator import default_generator
 from mindspore.ops.auto_generate import hardshrink, hardsigmoid, hardswish
 from mindspore.ops.auto_generate import softshrink
+from mindspore.ops.auto_generate import soft_margin_loss
+from mindspore.ops.auto_generate import moe_token_permute, moe_token_unpermute
 from mindspore.ops.auto_generate import adaptive_avg_pool2d_ext_op
 from mindspore.ops.auto_generate.pyboost_inner_prim import nllloss_impl
+from mindspore.ops.auto_generate.pyboost_inner_prim import adaptive_max_pool2d_impl
 from mindspore.ops.function.array_func import gather_ext
-from mindspore.ops.operations.manually_defined import flash_attention_score
+from mindspore.ops.operations.manually_defined import flash_attention_score, fused_infer_attention_score
 abs_ = P.Abs()
 add_ = P.Add()
@@ -160,11 +203,11 @@ def adaptive_avg_pool2d(input, output_size):
     .. math::
         out\_shape = \begin{cases}
-        input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
-        output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
-        input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
-        (h, h), & \text{if } output\_size text{ is h;}\\
-        (h, w), & \text{if } output\_size text{ is (h, w)}
+        input\_shape[-2] + output\_size[1], & \text{if } output\_size \text{ is (None, w);}\\
+        output\_size[0] + input\_shape[-1], & \text{if } output\_size \text{ is (h, None);}\\
+        input\_shape[-2:], & \text{if } output\_size \text{ is (None, None);}\\
+        (h, h), & \text{if } output\_size \text{ is h;}\\
+        (h, w), & \text{if } output\_size \text{ is (h, w)}
         \end{cases}
     Raises:
@@ -406,13 +449,15 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     Args:
         input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
-        kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
-        stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
+        kernel_size (int, optional): The size of kernel window used to take the average value. Default: ``1`` .
+        stride (Union(int, tuple[int]), optional): The distance of kernel moving. `stride` can either be an int
             number or a tuple of one int number. Default: ``1`` .
-        padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
+        padding (Union(int, tuple[int]), optional): The pad value to be filled. `padding` can either be an integer
             or a tuple of one integer. Default: ``0`` .
-        ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
-        count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
+        ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
+            Default: ``False``.
+        count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
+            Default: ``True`` .
     Returns:
         Tensor of shape :math:`(N, C_{out}, L_{out})`.
@@ -786,6 +831,77 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
     return avg_pool_op(input_x)
+def avg_pool3d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True,
+                   divisor_override=None):
+    r"""
+    Applies a 3D average pooling over an input Tensor which can be regarded as a composition of
+    3D input planes. Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` ,
+    outputs regional average in the :math:`(D_{in}, H_{in}, W_{in})` -dimension.
+    Given kernel size :math:`(kD, kH, kW)` and `stride` , the operation is as follows.
+    .. math::
+        \text{output}(N_i, C_j, d, h, w) = \frac{1}{kD * kH * kW} \sum_{l=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
+        \text{input}(N_i, C_j, stride[0] \times d + l, stride[1] \times h + m, stride[2] \times w + n)
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Note:
+        This interface currently does not support Atlas A2 training series products.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
+        kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
+            Can be a single number or a tuple :math:`(kD, kH, kW)` .
+        stride (Union[int, tuple[int], list[int]], optional): The distance of kernel moving.
+            Can be a single number or a tuple :math:`(sD, sH, sW)` . Default: ``None``,
+            where its value is equal to `kernel_size`.
+        padding (Union[int, tuple[int], list[int]], optional): Implicit zero padding to be added on both sides.
+            Can be a single number or a tuple :math:`(padD, padH, padW)` . Default: ``0``.
+        ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
+            Default: ``False``.
+        count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
+            Default: ``True`` .
+        divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
+            otherwise size of pooling region will be used. Default: ``None``.
+    Returns:
+        Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`.
+        .. math::
+            \begin{array}{ll} \\
+                D_{out} = \frac{D_{in} + 2 \times padding[0] - kernel\_size[0]}{stride[0]} + 1 \\
+                H_{out} = \frac{H_{in} + 2 \times padding[1] - kernel\_size[0]}{stride[1]} + 1 \\
+                W_{out} = \frac{W_{in} + 2 \times padding[2] - kernel\_size[1]}{stride[2]} + 1
+            \end{array}
+    Raises:
+        TypeError: If `input` is not a Tensor.
+        TypeError: If `kernel_size` or `stride` is neither int nor tuple.
+        TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
+        TypeError: If `divisor_override` is not an int or None.
+        ValueError: If the dimension of `input` is not equal to `4` or `5`.
+        ValueError: If `kernel_size` or `stride` is less than 1.
+        ValueError: If value of `padding` is less than `0`.
+        ValueError: If `kernel_size`, `padding` or `stride` is a tuple whose length is not equal to `1` or `3`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input_x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
+        >>> output = ops.avg_pool3d_ext(input_x, kernel_size=2, stride=1)
+        >>> print(output)
+        [[[[[ 5.  6.]]]
+          [[[17. 18.]]]]]
+    """
+    return avg_pool3d_ext_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
 @constexpr
 def is_ascend_backend():
     """Check if the Ascend is used"""
@@ -905,7 +1021,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
         \end{align}
     Note:
-        Ascend platform only supports float16 type for input.
+        In KBK mode, `output_size` does not support mutable.
     Args:
         input (Tensor): A 3D or 4D tensor,
@@ -914,7 +1030,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
             or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
             If it is None, it means the output size is the same as the input size.
-        return_indices (bool): If `return_indices` is ``True`` , the indices of max value would be output.
+        return_indices (bool, optional): If `return_indices` is ``True`` , the indices of max value would be output.
             Default: ``False`` .
     Returns:
@@ -966,11 +1082,17 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
           [[8. 9.]]
           [[8. 9.]]]]
     """
+    output_size_ = None
     _check_adaptive_max_pool2d(return_indices)
-    _adaptive_max_pool2d = _get_cache_prim(NN_OPS.AdaptiveMaxPool2D)(output_size)
-    out = _adaptive_max_pool2d(input)
-    output = out if return_indices else out[0]
-    return output
+    if isinstance(output_size, int):
+        output_size_ = (output_size, output_size)
+    else:
+        output_size_ = tuple(-1 if val is None else val for val in output_size)
+    if return_indices:
+        return adaptive_max_pool2d_impl(input, output_size_)
+    return adaptive_max_pool2d_impl(input, output_size_)[0]
 def adaptive_max_pool3d(input, output_size, return_indices=False):
@@ -1445,7 +1567,7 @@ def dropout(input, p=0.5, training=True, seed=None):
         input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
         p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
             means dropping out 10% of input units. Default: ``0.5`` .
-        training (bool): Apply dropout if is True. Default: ``True``.
+        training (bool, optional): Apply dropout if is True. Default: ``True``.
         seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
             Default: ``None`` , which will be treated as ``0`` .
@@ -1623,7 +1745,7 @@ def dropout2d(input, p=0.5, training=True):
         input (Tensor): A `4D` tensor with shape :math:`(N, C, H, W)`, where `N` is the batch size, `C` is the number
             of channels, `H` is the feature height, and `W` is the feature width. The data type must be int8,
             int16, int32, int64, float16, float32 or float64.
-        p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
+        p (float): The dropping probability of a channel. The range is [0.0, 1.0], e.g. `p` = 0.8,
             which means dropping out 80% of channels. Default: ``0.5`` .
         training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
@@ -2035,6 +2157,75 @@ def kl_div(logits, labels, reduction='mean'):
     return _get_cache_prim(P.KLDivLoss)(reduction=reduction)(logits, labels)
+def kl_div_ext(input, target, reduction='mean', log_target=False):
+    r"""
+    Computes the Kullback-Leibler divergence between the `input` and the `target`.
+    For tensors of the same shape :math:`x` and :math:`y`,
+    the updating formulas of KLDivLoss algorithm are as follows,
+    .. math::
+        L(x, y) = y \cdot (\log y - x)
+    Then,
+    .. math::
+        \ell(x, y) = \begin{cases}
+        L(x, y), & \text{if reduction} = \text{'none';}\\
+        \operatorname{mean}(L(x, y)), & \text{if reduction} = \text{'mean';}\\
+        \operatorname{sum}(L(x, y)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\
+        \operatorname{sum}(L(x, y)),  & \text{if reduction} = \text{'sum'.}
+        \end{cases}
+    where :math:`x` represents `input`, :math:`y` represents `target`, and :math:`\ell(x, y)` represents the output.
+    Note:
+        The output aligns with the mathematical definition of Kullback-Leibler divergence
+        only when `reduction` is set to ``'batchmean'``.
+    Args:
+        input (Tensor): The input Tensor. The data type must be float16, float32 or bfloat16(only supported by Atlas A2
+            training series products).
+        target (Tensor): The target Tensor which has the same type as `input`. The shapes of `target` and `input`
+            should be broadcastable.
+        reduction (str, optional): Specifies the reduction to be applied to the output. Default: ``'mean'``.
+        log_target (bool, optional): Specifies whether `target` is passed in the log space. Default: ``False``.
+    Returns:
+        Tensor, has the same dtype as `input`. If `reduction` is ``'none'``, then output has the shape as broadcast
+        result of the `input` and `target`. Otherwise, it is a scalar Tensor.
+    Raises:
+        TypeError: If neither `input` nor `target` is a Tensor.
+        TypeError: If dtype of `input` or `target` is not float16, float32 or bfloat16.
+        TypeError: If dtype of `target` is not the same as `input`.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``, ``'batchmean'``.
+        ValueError: If shapes of `target` and `input` can not be broadcastable.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore as ms
+        >>> from mindspore import ops
+        >>> import numpy as np
+        >>> input = ms.Tensor(np.array([[0.5, 0.5], [0.4, 0.6]]), ms.float32)
+        >>> target = ms.Tensor(np.array([[0., 1.], [1., 0.]]), ms.float32)
+        >>> output = ops.kl_div_ext(input, target, reduction='mean', log_target=False)
+        >>> print(output)
+        -0.225
+    """
+    if reduction == 'batchmean':
+        reduced = kl_div_op(input, target, 'sum', log_target)
+    else:
+        reduced = kl_div_op(input, target, reduction, log_target)
+    if reduction == 'batchmean' and input.ndim != 0:
+        reduced = divs_op(reduced, input.shape[0])
+    return reduced
 @constexpr
 def _check_axis_in_range(axis, ndim):
     """Checks axes are with the bounds of ndim"""
@@ -2094,33 +2285,22 @@ def _check_input_tensor(arg_name, *tensors):
 def flip(input, dims):
     """
-    Reverses the order of elements in a tensor along the given axis.
-    The shape of the tensor is preserved, but the elements are reordered.
+    Reverses elements in a tensor along the given dims.
     Args:
-        input (Tensor): Input tensor.
-        dims (Union[list[int], tuple[int]]): Axis or axes along which to flip over.
-            Flipping is performed on all of the axes specified in the tuple,
-            If `dims` is a tuple of integers contains negative, it counts from the last to the first axis.
+        input (Tensor): The input tensor.
+        dims (Union[list[int], tuple[int]]): The dimension to flip.
     Returns:
-        Tensor, with the entries of `dims` reversed.
-    Raises:
-        TypeError: If the input is not a tensor.
-        ValueError: If `dims` is None.
-        ValueError: If `dims` is not a list/tuple of ints.
+        Tensor
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
-        >>> from mindspore import ops
-        >>> import numpy as np
-        >>> input = mindspore.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
-        >>> output = ops.flip(input, (0, 2))
+        >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
+        >>> output = mindspore.ops.flip(input, (0, 2))
         >>> print(output)
         [[[6 5]
           [8 7]]
@@ -2133,26 +2313,21 @@ def flip(input, dims):
 def flipud(input):
     """
-    Flips the elements of each column in the up/down direction, while preserving the rows of the input tensor.
+    Flip the input tensor in up/down direction.
     Args:
-        input (Tensor): Input array.
+        input (Tensor): The input tensor, the dimension must be at least 2.
     Returns:
-        Tensor after the flip.
-    Raises:
-        TypeError: If the input is not a tensor.
+        Tensor
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import mindspore as ms
-        >>> from mindspore import ops
-        >>> import numpy as np
-        >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
-        >>> output = ops.flipud(input)
+        >>> import mindspore
+        >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
+        >>> output = mindspore.ops.flipud(input)
         >>> print(output)
         [[[5 6]
           [7 8]]
@@ -2164,26 +2339,21 @@ def flipud(input):
 def fliplr(input):
     """
-    Flips the elements of each row in the left/right direction, while preserving the columns of the input tensor.
+    Flip the input tensor in left/right direction.
     Args:
-        input (Tensor): Input tensor.
+        input (Tensor): The input tensor, the dimension must be at least 2.
     Returns:
-        Tensor after the flip.
-    Raises:
-        TypeError: If the input is not a tensor.
+        Tensor
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import mindspore as ms
-        >>> from mindspore import ops
-        >>> import numpy as np
-        >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
-        >>> output = ops.fliplr(input)
+        >>> import mindspore
+        >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
+        >>> output = mindspore.ops.fliplr(input)
         >>> print(output)
         [[[3 4]
           [1 2]]
@@ -2195,29 +2365,33 @@ def fliplr(input):
 def is_floating_point(input):
     """
-    Judge whether the data type of `input` is a floating point data type i.e., one of mindspore.float64,
-    mindspore.float32, mindspore.float16.
+    If the data type of the tensor is a floating point data type, return True. Otherwise return False.
     Args:
         input (Tensor): The input Tensor.
     Returns:
-        Bool. If the dtype of `input` is a floating point data type, return ``True`` . Otherwise, return ``False`` .
+        Bool
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import mindspore as ms
-        >>> from mindspore import ops
-        >>> from mindspore import Tensor
-        >>> x = ms.Tensor([1, 2, 3], ms.float32)
-        >>> y = ms.Tensor([1, 2, 3], ms.int64)
-        >>> output = ops.is_floating_point(x)
-        >>> output2 = ops.is_floating_point(y)
-        >>> print(output)
+        >>> import mindspore
+        >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float64)
+        >>> mindspore.ops.is_floating_point(input)
         True
-        >>> print(output2)
+        >>>
+        >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float32)
+        >>> mindspore.ops.is_floating_point(input)
+        True
+        >>>
+        >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float16)
+        >>> mindspore.ops.is_floating_point(input)
+        True
+        >>>
+        >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.int32)
+        >>> mindspore.ops.is_floating_point(input)
         False
     """
     return input.dtype in [mstype.float32, mstype.bfloat16, mstype.float16, mstype.float64]
@@ -2339,12 +2513,20 @@ def interpolate(input,
             If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
             after removing the first two dimensions N, C.
             One and only one of size and scale_factor can be set to None. Default: ``None`` .
-        mode (str): The sampling algorithm.
-            One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
-            'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
-            knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
+        mode (str, optional): The sampling algorithm. Default: ``"nearest"`` .
+            One of the following sampling methods can be used:
+            - 'nearest': the nearest neighbours interpolation.
+            - 'linear': Linear interpolation, 3D only.
+            - 'bilinear': Bilinear interpolation, 4D only.
+            - 'trilinear': Trilinear interpolation, 5D only.
+            - 'bicubic': Double trilinear interpolation, 4D only.
+            - 'area': area interpolation.
+            - 'nearest-exact': matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
+              knows issues with `nearest`, for 3D and 4D.
-        align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
+        align_corners (bool, optional): Whether to use corner alignment for coordinate mapping.
+            Assuming a transformation is
             applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
             .. code-block::
@@ -2361,9 +2543,10 @@ def interpolate(input,
             This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
         recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
-            If True, the parameter `size` will be calculated using the value of the `scale_factor`,
-            and finally scaled using the value of `size`.
-            If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
+            - If True, the parameter `size` will be calculated using the value of the `scale_factor`,
+              and finally scaled using the value of `size`.
+            - If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
     .. note::
         The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
@@ -2722,7 +2905,7 @@ def interpolate_ext(input,
             the corresponding coordinate of the original
             data along the x-axis.
-            This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
+            This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``None`` .
         recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
             If True, the parameter `size` will be calculated using the value of the `scale_factor`,
             and finally scaled using the value of `size`.
@@ -2755,20 +2938,6 @@ def interpolate_ext(input,
     Returns:
         Tensor, sampled, whose dimensions and dtype are the same as `input`.
-    Shape:
-        - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
-        - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
-          or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
-    .. math::
-        D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
-    .. math::
-        H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
-    .. math::
-        W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
     Raises:
         TypeError: `input` is not a Tensor.
         ValueError: Both `size` and `scale_factor` are not empty.
@@ -2929,58 +3098,6 @@ def softsign(x):
     return softsign_(x)
-def soft_margin_loss(input, target, reduction='mean'):
-    r"""
-    Calculate the soft margin loss of input and target.
-    Creates a criterion that optimizes a two-class classification
-    logistic loss between input tensor :math:`x` and target tensor :math:`y`
-    (containing 1 or -1).
-    .. math::
-        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
-    where :math:`x.nelement()` is the number of elements of :math:`x`.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
-    Args:
-        input (Tensor): Predict data. Data type must be float16 or float32.
-        target (Tensor): Ground truth data, with the same type and shape as `input`.
-        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
-            ``'sum'`` . Default: ``'mean'`` .
-            - ``'none'``: no reduction will be applied.
-            - ``'mean'``: compute and return the mean of elements in the output.
-            - ``'sum'``: the output elements will be summed.
-    Outputs:
-        Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `input`.
-        Otherwise, a scalar value will be returned.
-    Raises:
-        TypeError: If `input` or `target` is not a Tensor.
-        TypeError: If dtype of `input` or `target` is neither float16 nor float32.
-        ValueError: If shape of `input` is not the same as that of `target`.
-        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
-        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
-        >>> output = ops.soft_margin_loss(logits, labels)
-        >>> print(output)
-        0.6764238
-    """
-    soft_margin_loss_op = _get_cache_prim(P.SoftMarginLoss)(reduction=reduction)
-    output = soft_margin_loss_op(input, target)
-    return output
 def softmax(input, axis=-1, *, dtype=None):
@@ -3277,7 +3394,7 @@ def selu(input_x):
 def logsigmoid(x):
     r"""
-    Applies logsigmoid activation element-wise. The input is a Tensor with any valid shape.
+    Applies LogSigmoid activation element-wise. The input is a Tensor with any valid shape.
     Logsigmoid is defined as:
@@ -3911,8 +4028,10 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
     Args:
         input (Tensor): The input of rrelu is a Tensor of any dimension.
-        lower (Union[int, float]): Slope of the activation function at data of `input` < 0. Default: ``1.0 / 8`` .
-        upper (Union[int, float]): Slope of the activation function at data of `input` < 0. Default: ``1.0 / 3`` .
+        lower (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
+            Default: ``1.0 / 8`` .
+        upper (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
+            Default: ``1.0 / 3`` .
     Returns:
         Tensor, after rrelu, has the same type and shape as the `input`.
@@ -4174,7 +4293,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
     N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
     classes.
-    If `reduction` is not ``None`` (default ``'mean'``), then
+    If `reduction` is not ``'None'`` (default ``'mean'``), then
     .. math::
@@ -4301,7 +4420,7 @@ def nll_loss_ext(input, target, weight=None, ignore_index=-100, reduction='mean'
     :math:`N` is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index,
     where :math:`C` is the number of classes.
-    If `reduction` is not ``None`` (default ``'mean'``), then
+    If `reduction` is not ``'None'`` (default ``'mean'``), then
     .. math::
@@ -4353,7 +4472,7 @@ def nll_loss_ext(input, target, weight=None, ignore_index=-100, reduction='mean'
     return _nllloss_nd(input, target, weight, ignore_index, reduction)
-def _nllloss_nd(input, target, weight=None, ingore_index=-100, reduction='mean'):
+def _nllloss_nd(input, target, weight=None, ignore_index=-100, reduction='mean'):
     """nllloss_nd inner function"""
     input_dim = input.ndim
     class_dim = 0 if input_dim == 1 else 1
@@ -4366,9 +4485,9 @@ def _nllloss_nd(input, target, weight=None, ingore_index=-100, reduction='mean')
         raise ValueError(f"input bacth_size should be equal to target batch_size, but got {input.shape[0]} and "
                          f"{target.shape[0]}")
     if input_dim == 1 or input_dim == 2:
-        return nllloss_impl(input, target, weight, reduction, ingore_index)[0]
+        return nllloss_impl(input, target, weight, reduction, ignore_index)[0]
     if input_dim == 4:
-        return nllloss_2d_op(input, target, weight, reduction, ingore_index)[0]
+        return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
     # input_dim==3 or input_dim>4
     n = input.shape[0]
     c = input.shape[1]
@@ -4382,8 +4501,8 @@ def _nllloss_nd(input, target, weight=None, ingore_index=-100, reduction='mean')
     else:
         target = target.view((n, 0, 0))
     if reduction != 'none':
-        return nllloss_2d_op(input, target, weight, reduction, ingore_index)[0]
-    ret = nllloss_2d_op(input, target, weight, reduction, ingore_index)[0]
+        return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
+    ret = nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
     return ret.view(out_size)
@@ -4411,10 +4530,10 @@ def _cross_entropy_for_probabilities(input, target, weight, reduction, label_smo
     raise ValueError(f"redution value {reduction} not valid.")
-def _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduction, label_smoothing, class_dim,
+def _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing, class_dim,
                                      n_classes):
     """cross_entropy inner function for class indices"""
-    nllloss = _nllloss_nd(input, target, weight, ingore_index, reduction)
+    nllloss = _nllloss_nd(input, target, weight, ignore_index, reduction)
     if label_smoothing > 0.0:
         if weight is not None:
             weight_ = weight
@@ -4428,7 +4547,7 @@ def _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduct
             smooth_loss = -loss.sum(class_dim)
         else:
             smooth_loss = -input.sum(class_dim)
-        ignore_mask = ops.eq(target, ingore_index)
+        ignore_mask = ops.eq(target, ignore_index)
         smooth_loss = masked_fill_op(smooth_loss, ignore_mask, 0)
         if reduction == "mean":
             true_mask = ~ignore_mask
@@ -4454,7 +4573,7 @@ def _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduct
     return nllloss
-def cross_entropy_ext(input, target, weight=None, ingore_index=-100, reduction='mean', label_smoothing=0.0):
+def cross_entropy_ext(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
     r"""
     The cross entropy loss between input and target.
@@ -4511,13 +4630,13 @@ def cross_entropy_ext(input, target, weight=None, ingore_index=-100, reduction='
     Note:
         Dynamic shape, dynamic rank and variable constant input are not supported in `strict graph mode
         (jit_syntax_level=mindspore.STRICT)
-        <https://www.mindspore.cn/docs/en/master/model_train/program_form/static_graph.html>`_.
+        <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
     Args:
         input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
             in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
-            `input` is expected to be log-probabilities, data type must be float16 or float32 or bfloat16(only supported
-            by Atlas A2 training series products).
+            `input` is expected to be log-probabilities, data type must be float16 or float32 or bfloat16
+            (only supported by Atlas A2 training series products).
         target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
             :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32 or int64. For probabilities, tensor of shape
             :math:`(N,)` , :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32
@@ -4577,7 +4696,7 @@ def cross_entropy_ext(input, target, weight=None, ingore_index=-100, reduction='
         return _cross_entropy_for_probabilities(input, target, weight, reduction, label_smoothing, class_dim,
                                                 n_classes)
     # for class indices
-    return _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduction, label_smoothing,
+    return _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing,
                                             class_dim, n_classes)
@@ -4596,7 +4715,7 @@ def l1_loss(input, target, reduction='mean'):
     where :math:`N` is the batch size.
-    If `reduction` is ``'mean'`` or ``'sum'`` , then:
+    If `reduction` is set to ``'mean'`` or ``'sum'`` , then:
     .. math::
         \ell(x, y) =
@@ -4801,8 +4920,8 @@ def leaky_relu(input, alpha=0.2):
     Args:
         input (Tensor): The input of leaky_relu is a Tensor of any dimension.
-        alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
-          Default: ``0.2`` .
+        alpha (Union[int, float], optional): Slope of the activation function when
+            the element of `input` is less than 0. Default: ``0.2`` .
     Returns:
         Tensor, has the same type and shape as the `input`.
@@ -4834,37 +4953,25 @@ def leaky_relu(input, alpha=0.2):
 def intopk(x1, x2, k):
     r"""
-    Determines whether the targets are in the top `k` predictions.
+    Return whether the elements in second input tensor exist among the top `k` elements of the first input tensor.
     Args:
-        x1 (Tensor): A 2D Tensor defines the predictions of a batch of samples with float16 or float32
-          data type.
-        x2 (Tensor): A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of `x2`
-          must be equal to the first dimension of `x1`. The values of `x2` can not be negative and
-          must be equal to or less than index of x1's second dimension.
-        k (int): Specifies the number of top elements to be used for computing precision along the last dimension.
+        x1 (Tensor): The 2-D input tensor.
+        x2 (Tensor): The 1-D input tensor, should satisfy :math:`x2.shape[0] = x1.shape[0]` .
+        k (int): Top `k` elements.
     Returns:
-        Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`,
-        if the label in the first `k` predictions for sample `i` is in `x1`, then the value is True, otherwise False.
-    Raises:
-        TypeError: If `k` is not an int.
-        TypeError: If `x1` or `x2` is not a Tensor.
-        TypeError: If dtype of `x1` is neither float16 nor float32.
+        A 1-D tensor whose data type is bool, has the same shape with `x2`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
         >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
-        >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
-        >>> output = ops.intopk(x1, x2, 3)
-        >>> print(output)
-        [ True  False]
+        >>> x1 = mindspore.tensor([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]], mindspore.float32)
+        >>> x2 = mindspore.tensor([1, 3], mindspore.int32)
+        >>> mindspore.ops.intopk(x1, x2, 3)
+        Tensor(shape=[2], dtype=Bool, value= [ True, False])
     """
     _in_topk = _get_cache_prim(P.InTopK)(k)
     return _in_topk(x1, x2)
@@ -6121,6 +6228,127 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     return output
+def conv1d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    r"""
+    Applies a 1D convolution over an input tensor. The input tenor is typically
+    of shape :math:`(N, C_{in}, L_{in})`,
+    where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is sequence length.
+    The output is calculated based on formula:
+    .. math::
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
+    where :math:`bias` is the output channel bias, :math:`ccor` is
+    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
+    :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
+    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
+      where :math:`N` is the batch size of the input.
+    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
+      where :math:`C_{out}` is the number of
+      output channels, which is also equal to the number of kernels.
+    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
+      where :math:`C_{in}` is the number of
+      input channels, which is also equal to the number of channels in the convolutional kernels.
+    Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
+    output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
+    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
+    channel in the :math:`i`-th batch of the input feature map.
+    The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
+    where :math:`\text{kernel_size}` is the length of the kernel.
+    If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
+    will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
+    where `groups` is the number of groups dividing `x`'s input channel when applying groups convolution.
+    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
+    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`.
+        weight (Tensor): Tensor of shape
+            :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel
+            is :math:`(\text{kernel_size})`.
+        bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
+            When bias is ``None`` , zeros will be used. Default: ``None`` .
+        stride (Union[int, tuple[int], list[int]], optional): The movement stride of the 1D convolution kernel.
+            The data type is an integer or a tuple of one integer. Default: ``1`` .` .
+        padding (Union[int, tuple[int], list[int], str], optional): The number of padding
+            on the input.
+            The data type is an integer or a tuple of one integer or string {`valid`, `same`}.
+            The value should be greater than or equal to 0. Default: ``0`` .
+            - ``"same"``: Pad the input around its edges so that the shape of input and output
+              are the same when `stride` is set to ``1``.
+              The amount of padding to is calculated by the operator internally, If the amount is even, it is
+              uniformly distributed around the input, if it is odd, the excess amount goes to the right side.
+              If this mode is set, `stride` must be 1.
+            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
+              possible length. Extra sequence that could not complete a full stride will
+              be discarded.
+        dilation (Union[int, tuple[int], list[int]], optional): Specifies the dilation rate to use for
+            dilated convolution. It can be a single int or a tuple of 1 integer.
+            Assuming :math:`dilation=(d)`, the convolutional kernel samples the input with a
+            spacing of :math:`d-1` elements in the length direction.
+            Default: ``1`` .
+        groups (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be
+            divisible by `groups`. If the groups is equal to `in_channels` and `out_channels`,
+            this 1D convolution layer also can be called 1D depthwise convolution layer. Default: ``1`` .
+            - :math:`(C_{in} \text{ % } \text{groups} == 0)` , :math:`(C_{out} \text{ % } \text{groups} == 0)` ,
+              :math:`(C_{out} >= \text{groups})` , :math:`(\text{kernel_size[1]} = C_{in} / \text{groups})`。
+    Returns:
+        Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.mint.nn.Conv1d` for more details.
+    Raises:
+        ValueError: Args and size of the input feature map should satisfy the output formula to ensure that the size of
+            the output feature map is positive; otherwise, an error will be reported.
+        RuntimeError: On Ascend, due to the limitation of the L1 cache size of different NPU chip, if input size or
+            kernel size is too large, it may trigger an error.
+        TypeError: If `in_channels`, `out_channels` or `groups` is not an int.
+        TypeError: If `kernel_size`, `stride` or `dilation` is neither an int not a tuple.
+        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
+        ValueError: If `padding` is less than 0.
+        ValueError: If `padding` is `same` , `stride` is not equal to 1.
+        ValueError: The input parameters do not satisfy the convolution output formula.
+        ValueError: The KernelSize cannot exceed the size of the input feature map.
+        ValueError: The value of padding cannot cause the calculation area to exceed the input size.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops, mint
+        >>> x = Tensor(np.ones([10, 32, 32]), mindspore.float32)
+        >>> weight = Tensor(np.ones([32, 32, 3]), mindspore.float32)
+        >>> output = mint.nn.functional.conv1d(x, weight)
+        >>> print(output.shape)
+        (10, 32, 30)
+    """
+    if isinstance(padding, (int, tuple, list)):
+        return conv1d_ext_op(input, weight, bias, stride, padding, dilation, groups)
+    if isinstance(padding, str):
+        return conv1d_padding_op(input, weight, bias, stride, padding, dilation, groups)
+    raise TypeError(f"For conv1d, the parameter 'padding' must be a tuple/list " \
+                    f"or a string, but got {type(padding)}")
 def _check_stride_when_same_mode(stride):
     """ stride must be 1 when pad mode is same """
     if isinstance(stride, int):
@@ -6220,10 +6448,10 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
             is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
         bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
             When bias is ``None`` , zeros will be used. Default: ``None`` .
-        stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
-            the height and width of movement are both strides, or a tuple of two int numbers that
+        stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int number that
+            represents the height and width of movement are both strides, or a tuple of two int numbers that
             represent height and width of movement respectively. Default: ``1`` .
-        padding (Union[int, tuple[int], str], optional): The number of padding
+        padding (Union[int, tuple[int], list[int], str], optional): The number of padding
             on the height and width directions of the input.
             The data type is an integer or a tuple of two integers or string {`valid`, `same`}. If `padding` is an
             integer, then `padding_{H}` and `padding_{W}` are all equal to `padding`.
@@ -6241,8 +6469,9 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
               possible height and width. Extra pixels that could not complete a full stride will
               be discarded.
-        dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
-            2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
+        dilation (Union(int, tuple[int], list[int]), optional): Gaps between kernel elements.The data type
+            is int or a tuple of 2 integers. Specifies the dilation rate to use for dilated convolution.
+            If set to be :math:`k > 1`,
             there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
             be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
         groups (int, optional): Splits `input` into groups. Default: ``1`` .
@@ -7031,39 +7260,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
     Returns:
         Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
-        `pad_mode` is ``"same"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
-                H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
-                W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
-            \end{array}
-        `pad_mode` is ``"valid"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
-                {\text{stride[0]}} + 1} \right \rfloor \\
-                H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
-                {\text{stride[1]}} + 1} \right \rfloor \\
-                W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
-                {\text{stride[2]}} + 1} \right \rfloor \\
-            \end{array}
-        `pad_mode` is ``"pad"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
-                \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
-                H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
-                \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
-                W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
-                \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
-            \end{array}
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv3d` for more details.
     Raises:
         TypeError: If `out_channel` or `groups` is not an int.
@@ -7206,10 +7404,11 @@ def conv3d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
             kw)`, then the shape is :math:`(C_{out}, C_{in} / groups, kd, kh, kw)`.
         bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
             When bias is ``None`` , zeros will be used. Default: ``None`` .
-        stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
-            the depth, the height and width of movement are both strides, or a tuple of triple int numbers that
+        stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int
+            number that represents the depth, the height and width of movement are both strides, or a
+            tuple of triple int numbers that
             represent the depth, height and width of movement respectively. Default: ``1`` .
-        padding (Union(int, tuple[int], str), optional): Implicit paddings on both sides of the input `x`.
+        padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
             Can be a string, one integer or a tuple/list with 3 integers.
             If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
@@ -7224,7 +7423,8 @@ def conv3d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
             If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
             If `padding` is a tuple/list with 3 integers, the padding of head, tail, top, bottom,
             left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: ``0`` .
-        dilation (Union[int, tuple[int]], optional): Controlling the space between the kernel points. Default: ``1`` .
+        dilation (Union[int, tuple[int], list[int]], optional): Controlling the space between the kernel points.
+            Default: ``1`` .
         groups (int, optional): Splits `input` into groups. Default: ``1`` .
     Returns:
@@ -7450,9 +7650,6 @@ def glu_ext(input, dim=-1):
     Here :math:`\sigma` is the sigmoid function, and :math:`\otimes` is the Hadamard product.
     See `Language Modeling with Gated Convluational Networks <https://arxiv.org/abs/1612.08083>`_.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): Tensor to be calculated. Dtype is floating point and the shape
             is :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional dimensions. :math:`N`
@@ -7461,6 +7658,9 @@ def glu_ext(input, dim=-1):
         dim (int, optional): The dimension to split the input `input`. The value range is `[-r, r)` where `r`
             is the number of dimensions of `input`. Default: ``-1`` , the last dimension in `input`.
+    Returns:
+        Tensor, the same dtype as the input `input`. The shape is :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`.
     Raises:
         TypeError: If `input` is not a Tensor or `dim` is not an int.
         IndexError: If the value of `dim` is out of the range of `[-r, r)`, where `r` is the number
@@ -7472,9 +7672,9 @@ def glu_ext(input, dim=-1):
         ``Ascend`` ``CPU``
     Examples:
-        >>> from mindspore import Tensor, mint
-        >>> input = Tensor([[0.1,0.2,0.3,0.4],[0.5,0.6,0.7,0.8]])
-        >>> output = mint.nn.functional.glu(input)
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]])
+        >>> output = ops.function.nn_func.glu_ext(input)
         >>> print(output)
         [[0.05744425 0.11973753]
          [0.33409387 0.41398472]]
@@ -7642,7 +7842,8 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
     Args:
         input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
         target (Tensor): The label target Tensor which has the same shape as `input`.
-        weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
+        weight (Union[Tensor, int, float], optional): The manual rescaling weight given to each class.
+            Default: ``None``.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'mean'`` .
@@ -7759,30 +7960,19 @@ def channel_shuffle(x, groups):
     shape in the final output.
     Args:
-        x (Tensor): Tensor to be divided, it has shape :math:`(*, C, H, W)`,
-          with float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 data type.
+        x (Tensor): The input tensor.
         groups (int): Number of groups to divide channels in.
     Returns:
-        A Tensor, has the same type as the `x`, and has the shape :math:`(*, C, H, W)`.
-    Raises:
-        TypeError: If data type of `x` is not one of the following:
-                   float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
-        TypeError: If dim of `x` is < 4.
-        TypeError: If `groups` is not a positive number.
-        ValueError: If channel number of `x` is not divisible by `groups`.
+        Tensor
     Supported Platforms:
         ``Ascend`` ``CPU``
     Examples:
         >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> group = 2
-        >>> x = Tensor(np.arange(1* 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
-        >>> y = mindspore.ops.channel_shuffle(x, group)
+        >>> x = mindspore.tensor(mindspore.ops.arange(0, 16, dtype=mindspore.int16).reshape(1, 4, 2, 2))
+        >>> y = mindspore.ops.channel_shuffle(x, groups=2)
         >>> print(y)
         [[[[ 0  1]
            [ 2  3]]
@@ -8135,35 +8325,36 @@ def mse_loss(input, target, reduction='mean'):
 def msort(input):
     r"""
-    Sorts the elements in Tensor in ascending order of value along its first dimension.
-    ops.msort(t) is equivalent to ops.Sort(axis=0)(t)[0]. See also :class:`mindspore.ops.Sort()`.
+    Return a tensor obtained by sorting the input tensor in ascending order along its first dimension.
-    .. Note::
-        The Ascend backend only supports sorting the 1D input.
+    `ops.msort(input)` is equivalent to `ops.sort(axis=0)(input)[0]`. See also :class:`mindspore.ops.Sort()` for more
+    details.
     Args:
-        input (Tensor): The input to sort, with float16 or float32 data type.
+        input (Tensor): The input tensor to sort.
     Returns:
-        A tensor whose values are the sorted values, with the same shape and data type as input.
-    Raises:
-        TypeError: If dtype of `input` is neither float16 nor float32.
+        Tensor
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
-        >>> import mindspore as ms
-        >>> from mindspore import ops
-        >>> import numpy as np
-        >>> input = ms.Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), ms.float16)
-        >>> output = ops.msort(input)
-        >>> print(output)
-        [[4. 2. 1.]
-         [5. 6. 3.]
-         [8. 9. 7.]]
+        >>> import mindspore
+        >>> input = mindspore.tensor([[8, 2, 1],
+        ...                           [5, 9, 3],
+        ...                           [4, 6, 7]])
+        >>> mindspore.ops.msort(input)
+        Tensor(shape=[3, 3], dtype=Int64, value=
+        [[4, 2, 1],
+         [5, 6, 3],
+         [8, 9, 7]])
+        >>> # is equivalent to `ops.sort(axis=0)(input)[0]`
+        >>> mindspore.ops.sort(input, axis=0)[0]
+        Tensor(shape=[3, 3], dtype=Int64, value=
+        [[4, 2, 1],
+         [5, 6, 3],
+         [8, 9, 7]])
     """
     return ops.Sort(axis=0)(input)[0]
@@ -9168,6 +9359,215 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
     return embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
+def speed_fusion_attention(query, key, value, head_num, input_layout, *, pse=None, padding_mask=None, atten_mask=None,
+                           scale=1.0, keep_prob=1.0, pre_tokens=2147483647, next_tokens=2147483647, inner_precise=0,
+                           prefix=None, actual_seq_qlen=None, actual_seq_kvlen=None, sparse_mode=0,
+                           gen_mask_parallel=True, sync=False, pse_type=1, q_start_idx=None, kv_start_idx=None):
+    r"""
+    The interface is used for self-attention fusion computing.
+    If `pse_type` is ``1`` , calculation formula is:
+    .. math::
+        attention\_out = Dropout(Softmax(Mask(scale * (pse + query * key^{T}), atten\_mask)), keep\_prob) * value
+    If `pse_type` is other valid value, calculation formula is:
+    .. math::
+        attention\_out = Dropout(Softmax(Mask(scale * (query * key^{T}) + pse, atten\_mask)), keep\_prob) * value
+    - B: Batch size. Value range 1 to 2k.
+    - S1: Sequence length of query. Value range 1 to 512k.
+    - S2: Sequence length of key and value. Value range 1 to 512k.
+    - N1: Num heads of query. Value range 1 to 256.
+    - N2: Num heads of key and value, and N2 must be a factor of N1.
+    - D: Head size. The value ranges is a multiple of 16, with the max value of 512.
+    - H1: Hidden size of query, which equals to N1 * D.
+    - H2: Hidden size of key and value, which equals to N2 * D.
+    .. warning::
+        - This is an experimental API that is subject to change or deletion.
+        - Only support on Atlas A2 training series.
+    Note:
+        This interface is not supported in `graph mode (mode=mindspore.GRAPH_MODE)
+        <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
+    Args:
+        query (Tensor): The query tensor. Input tensor of shape :math:`(B, S1, H1)`,
+            :math:`(B, N1, S1, D)`, :math:`(S1, B, H1)`, :math:`(B, S1, N1, D)` or :math:`(T1, N1, D)`.
+        key (Tensor): The key tensor. Input tensor of shape :math:`(B, S2, H2)`,
+            :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
+        value (Tensor): The value tensor. Input tensor of shape :math:`(B, S2, H2)`,
+            :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
+            The `key` and `value` should have the same shape.
+        head_num (int): The head num of query, equal to N1.
+        input_layout (str): Specifies the layout of input `query`, `key` and `value`. The value can be ``"BSH"`` ,
+            ``"BNSD"`` , ``"SBH"`` , ``"BSND"`` or ``"TND"`` . ``"TND"`` is an experimental format.
+            When `input_layout` is ``"TND"`` , the following restrictions must be met.
+            There are two lists that represent the length of the input sequence: list_seq_q and list_seq_k. Each
+            value in the list indicates the length of the sequence in the batch. For example, list_seq_q = [4, 2, 6],
+            list_seq_k = [10, 3, 9]. The element of list indicate S. T1 is sum(list_seq_q) = 12, T2 is
+            sum(list_seq_k) = 22.
+            max_seqlen_q = max(list_seq_q), max_seqlen_k = max(list_seq_k).
+            qk_pointer = sum(list_seq_q * list_seq_k), which is the sum of the element multiplication.
+            - The lengths of two lists are the same, and size of list is batch. batch is less than or equal to 1024.
+            - When `input_layout` is ``"TND"`` , `actual_seq_qlen` and `actual_seq_kvlen` must be not ``None`` .
+              Otherwise, they are ``None`` .
+            - The `actual_seq_qlen` and `actual_seq_kvlen` are the cumulative sum of sequence of key/value, so they must
+              be non-decreasing.
+            - If `pse` is not ``None`` , list_seq_q and list_seq_k must be same. The maximum value of list_seq_q and
+              list_seq_k is greater than 1024. `pse` should be :math:`(B, N1, 1024, S2)` and
+              :math:`(1, N1, 1024, S2)`, and S2 is equal to max_seqlen_k.
+            - `atten_mask` must be a lower trianglar matrix, so `sparse_mode` should be 2 or 3. The shape of
+              `atten_mask` should be :math:`(2048, 2048)`.
+            - Prefix is ``None`` .
+            - `next_tokens` is 0, and `pre_tokens` is not less than max_seqlen_q.
+            - When `sparse_mode` is 3, S1 of each batch should be less than or equal to S2.
+            - 0 should not exist in list_seq_k.
+    Keyword Args:
+        pse (Tensor, optional): The position embedding code, dtype is same as `query`.  Default: ``None`` .
+            If S is greater than 1024 and the mask of the lower triangle is used, enter only the inverse 1024 lines of
+            the lower triangle for memory optimization. Input tensor of shape :math:`(B, N1, S1, S2)`,
+            :math:`(1, N1, S1, S2)`, :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
+            - ALiBi scenario: `pse` must meet the ALiBi rule, and `sparse_mode` is 2 or 3 for the lower triangle.
+              In this scenario, `pse` is :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
+            - Non-ALiBi scenario: `pse` is :math:`(B, N1, S1, S2)`, :math:`(1, N1, S1, S2)`.
+            - The shape of `pse` should be :math:`(B, N1, 1024, S2)` and :math:`(1, N1, 1024, S2)` when `input_layout`
+              is ``"TND"`` .
+            - If `pse_type` is 2 or 3, dtype of `pse` must be float32, and shape of `pse` should be :math:`(B, N1)` or
+              :math:`(N1,)`.
+        padding_mask (Tensor, optional): Reserved parameter. Not implemented yet. Default: ``None`` .
+        atten_mask (Tensor, optional): The attention mask tensor. For each element, 0/False indicates retention and
+            1/True indicates discard. Input tensor of shape :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
+            :math:`(S1, S2)` or :math:`(2048, 2048)`. Default: ``None`` .
+            - In compression scenario, `sparse_mode` is 2, 3, or 4, `atten_mask` must be :math:`(2048, 2048)`.
+            - When `sparse_mode` is 5, `atten_mask` must be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`.
+            - When `sparse_mode` is 0 and 1, `atten_mask` should be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
+              :math:`(S1, S2)`.
+        scale (float, optional): The scale factor of score. Generally, the value is 1.0 / (D ** 0.5). Default: ``1.0`` .
+        keep_prob (float, optional): The keep probability of dropout. Value range is (0.0, 1.0]. Default: ``1.0`` .
+        pre_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted forward.
+            When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
+        next_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted backward.
+            When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
+            The value of pre_tokens corresponds to S1, and the value of next_tokens corresponds to S2. They define the
+            valid area on the `atten_mask` matrix. It must ensure that the band is not empty.
+            The following values are not allowed:
+            - pre_tokens < 0 and next_tokens < 0.
+            - (pre_tokens < 0 and next_tokens >= 0) and (next_tokens < abs(pre_tokens) or abs(pre_tokens) >= S2).
+            - (pre_tokens >= 0 and next_tokens < 0) and (abs(next_tokens) > pre_tokens or abs(next_tokens) >= S1).
+        inner_precise (int, optional): The parameter is reserved and not implemented yet. Default: ``0`` .
+        prefix (Union[tuple[int], list[int]], optional): N value of each Batch in the prefix sparse calculation
+            scenario. Input tensor of shape :math:`(B,)`. B max value 32. Not none only when sparse_mode is 5.
+            If S1 > S2, N ranges from 0 to S2. If S1 <= S2, N ranges from S2 - S1 to S2. Default: ``None`` .
+        actual_seq_qlen (Union[tuple[int], list[int]], optional): Size of query corresponding to each batch, array
+            with increasing values and the last value equal to T1. Default: ``None`` .
+        actual_seq_kvlen (Union[tuple[int], list[int]], optional): Size of key and value corresponding to each batch,
+            array with increasing values and the last value equal to T2. Default: ``None`` .
+        sparse_mode (int, optional): Indicates sparse mode. Default ``0`` .
+            - 0: Indicates the defaultMask mode. If `atten_mask` is not passed, the mask operation is not performed,
+              and preTokens and nextTokens(internally assigned as INT_MAX) are ignored. If passed in, the full
+              `atten_mask` matrix (S1 * S2) needs to be passed in, indicating that the part between preTokens and
+              nextTokens needs to be calculated.
+            - 1: Represents allMask, that is, passing in the complete `atten_mask` matrix.
+            - 2: Representing the leftUpCausal mode corresponds to the lower triangle scenario divided by the left
+              vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
+            - 3: Representing the rightDownCausal model corresponds to the lower triangle scene divided by the lower
+              right vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
+            - 4: Represents the band scenario, that is, the part between counting preTokens and nextTokens, and the
+              optimized `atten_mask` matrix (2048*2048) is required.
+            - 5: Represents the prefix scenario, that is, on the basis of rightDownCasual, a matrix with length S1 and
+              width N is added to the left side. The value of N is obtained by the new input prefix, and the N value
+              of each Batch axis is different. Currently not enabled.
+            - 6: Represents the global scenario. Currently not enabled.
+            - 7: Represents the dilated scenario. Currently not enabled.
+            - 8: Represents the block_local scenario. Currently not enabled.
+        gen_mask_parallel (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
+            If ``True`` , dropout_gen_mask is executed in parallel. If ``False`` , execution is serial.
+            Not implemented yet. Default: ``True`` .
+        sync (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
+            If ``True`` , dropout_gen_mask is executed synchronously. If ``False`` , execution is asynchronous.
+            Not implemented yet. Default: ``False`` .
+        pse_type (int, optional): Indicates how to use `pse`. Default ``1`` .
+            - 0: `pse` is passed from outside, and the calculation process is to first mul `scale` and then add `pse`.
+            - 1: `pse` is passed from outside, and the calculation process is to add `pse` first and then mul `scale`.
+            - 2: `pse` is generated internally and generates standard alibi position information. The internally
+              generated alibi matrix 0 line is aligned with the upper left corner of :math:`query * key^{T}`.
+            - 3: `pse` is generated internally, and the generated alibi position information is based on the standard
+              and then the square root of sqrt is done. The internally generated alibi matrix 0 line is aligned with
+              the upper left corner of :math:`query * key^{T}`.
+        q_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
+            When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
+            generated alibi code is offset in the S1 direction. A positive number indicates that 0 moves diagonally
+            upward.
+        kv_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
+            When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
+            generated alibi code is offset in the S2 direction. A positive number indicates that 0 moves diagonally
+            upward.
+    Returns:
+        A tuple of tensors containing `attention_out`, `softmax_max`, `softmax_sum`, `softmax_out`, `seed`, `offset`
+        and `numels` .
+        - `attention_out` is the output of attention, it's shape, and data type are the same as the query.
+        - `softmax_max` is the max intermediate result calculated by Softmax, used for grad calculation.
+        - `softmax_sum` is the sum intermediate result calculated by Softmax, used for grad calculation.
+        - `softmax_out` is a reserved parameter.
+        - `seed` is generated seed, used for Dropout.
+        - `offset` is generated offset, used for Dropout.
+        - `numels` is the length of generated dropout_mask.
+    Raises:
+        TypeError: `query`, `key` and `value` don't have the same dtype.
+        TypeError: Dtype of `atten_mask` is not bool or uint8.
+        TypeError: `scale` or `keep_prob` is not a float number.
+        TypeError: `input_layout` is not a string.
+        TypeError: `head_num` is not an int.
+        TypeError: `sparse_mode` is not an int.
+        TypeError: `pse` is not Tensor type.
+        TypeError: `padding_mask` is not Tensor type.
+        TypeError: `atten_mask` is not Tensor type.
+        TypeError: `pse_type` is not an int.
+        ValueError: `input_layout` is a string but not valid.
+        ValueError: The specified value of `sparse_mode` is invalid.
+        ValueError: The specified value of `pse_type` is invalid.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.common.dtype as mstype
+        >>> import numpy as np
+        >>> from mindspore import ops, Tensor
+        >>> query = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
+        >>> key = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
+        >>> value = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
+        >>> head_num = 4
+        >>> input_layout = "BSH"
+        >>> output = ops.speed_fusion_attention(query, key, value, head_num, input_layout)
+        >>> print(output[0].shape)
+        (2, 4, 64)
+    """
+    seed, offset = default_generator._step(generator_step_)  # pylint: disable=protected-access
+    return speed_fusion_attention_op(query, key, value, head_num, input_layout, seed, offset, pse, padding_mask,
+                                     atten_mask, scale, keep_prob, pre_tokens, next_tokens, inner_precise, prefix,
+                                     actual_seq_qlen, actual_seq_kvlen, sparse_mode, gen_mask_parallel, sync, pse_type,
+                                     q_start_idx, kv_start_idx)
 __all__ = [
     'adaptive_avg_pool1d',
     'adaptive_avg_pool2d',
@@ -9198,6 +9598,7 @@ __all__ = [
     'fast_gelu',
     'fractional_max_pool2d',
     'fractional_max_pool3d',
+    'speed_fusion_attention',
     'pixel_shuffle',
     'pixel_unshuffle',
     'hardshrink',
@@ -9205,6 +9606,7 @@ __all__ = [
     'incre_flash_attention',
     'prompt_flash_attention',
     'flash_attention_score',
+    'fused_infer_attention_score',
     'flip',
     'fliplr',
     'flipud',
@@ -9225,7 +9627,6 @@ __all__ = [
     'softplus',
     'selu',
     'silu',
-    'soft_margin_loss',
     'softmax',
     'softmin',
     'pdist',
@@ -9247,6 +9648,7 @@ __all__ = [
     'conv2d',
     'conv_transpose2d',
     'sigmoid',
+    'soft_margin_loss',
     'logsigmoid',
     'relu',
     'relu6',
@@ -9264,6 +9666,8 @@ __all__ = [
     'gaussian_nll_loss',
     'lp_pool1d',
     'lp_pool2d',
+    'moe_token_permute',
+    'moe_token_unpermute',
     'max_unpool1d',
     'max_unpool2d',
     'max_unpool3d',