PyPI - mindspore - Versions diffs - 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl - Mend

mindspore 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +5 -2
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +2 -2
mindspore/_extends/builtin_operations.py +3 -3
mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +3 -3
mindspore/_extends/parse/compile_config.py +24 -1
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
mindspore/_extends/parse/parser.py +28 -22
mindspore/_extends/parse/resources.py +1 -1
mindspore/_extends/parse/standard_method.py +23 -2
mindspore/_extends/parse/trope.py +2 -1
mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
mindspore/amp.py +0 -18
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/base.py +29 -2
mindspore/common/__init__.py +18 -12
mindspore/common/_decorator.py +3 -2
mindspore/common/_grad_function.py +3 -1
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +371 -96
mindspore/common/_utils.py +7 -43
mindspore/common/api.py +434 -135
mindspore/common/dtype.py +98 -57
mindspore/common/dump.py +7 -108
mindspore/common/dynamic_shape/__init__.py +0 -0
mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
mindspore/common/file_system.py +59 -9
mindspore/common/hook_handle.py +82 -3
mindspore/common/jit_config.py +5 -1
mindspore/common/jit_trace.py +27 -12
mindspore/common/lazy_inline.py +5 -3
mindspore/common/np_dtype.py +3 -3
mindspore/common/parameter.py +17 -127
mindspore/common/recompute.py +4 -13
mindspore/common/tensor.py +50 -217
mindspore/communication/_comm_helper.py +11 -1
mindspore/communication/comm_func.py +138 -4
mindspore/communication/management.py +85 -1
mindspore/config/op_info.config +0 -15
mindspore/context.py +20 -106
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/transforms.py +1 -1
mindspore/dataset/core/config.py +35 -1
mindspore/dataset/engine/datasets.py +338 -319
mindspore/dataset/engine/datasets_user_defined.py +38 -22
mindspore/dataset/engine/datasets_vision.py +1 -1
mindspore/dataset/engine/validators.py +1 -15
mindspore/dataset/transforms/c_transforms.py +2 -2
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/vision/__init__.py +1 -1
mindspore/dataset/vision/py_transforms.py +8 -8
mindspore/dataset/vision/transforms.py +17 -5
mindspore/dataset/vision/utils.py +632 -21
mindspore/device_context/ascend/op_tuning.py +35 -1
mindspore/dnnl.dll +0 -0
mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
mindspore/graph/custom_pass.py +55 -0
mindspore/include/api/cell.h +28 -4
mindspore/include/api/cfg.h +24 -7
mindspore/include/api/context.h +1 -0
mindspore/include/api/delegate.h +0 -2
mindspore/include/api/dual_abi_helper.h +100 -19
mindspore/include/api/graph.h +14 -1
mindspore/include/api/kernel.h +16 -3
mindspore/include/api/kernel_api.h +9 -1
mindspore/include/api/metrics/accuracy.h +9 -0
mindspore/include/api/model.h +5 -1
mindspore/include/api/model_group.h +4 -0
mindspore/include/api/model_parallel_runner.h +2 -0
mindspore/include/api/status.h +48 -10
mindspore/include/api/types.h +6 -1
mindspore/include/dataset/constants.h +9 -0
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/__init__.py +3 -3
mindspore/mindrecord/common/exceptions.py +1 -0
mindspore/mindrecord/config.py +1 -1
mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
mindspore/mindrecord/filereader.py +4 -4
mindspore/mindrecord/filewriter.py +5 -5
mindspore/mindrecord/mindpage.py +2 -2
mindspore/mindrecord/tools/cifar10.py +4 -3
mindspore/mindrecord/tools/cifar100.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
mindspore/mindrecord/tools/csv_to_mr.py +1 -1
mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_cluster.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_cpu.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_hardware_abstract.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mindspore_runtime_utils.dll +0 -0
mindspore/mindspore_tools.dll +0 -0
mindspore/mint/__init__.py +15 -10
mindspore/mint/distributed/__init__.py +4 -0
mindspore/mint/distributed/distributed.py +392 -69
mindspore/mint/nn/__init__.py +2 -16
mindspore/mint/nn/functional.py +4 -110
mindspore/mint/nn/layer/__init__.py +0 -2
mindspore/mint/nn/layer/_functions.py +1 -2
mindspore/mint/nn/layer/activation.py +0 -6
mindspore/mint/nn/layer/basic.py +0 -47
mindspore/mint/nn/layer/conv.py +10 -10
mindspore/mint/nn/layer/normalization.py +11 -16
mindspore/mint/nn/layer/pooling.py +0 -4
mindspore/nn/__init__.py +1 -3
mindspore/nn/cell.py +231 -239
mindspore/nn/layer/activation.py +4 -2
mindspore/nn/layer/basic.py +56 -14
mindspore/nn/layer/container.py +16 -0
mindspore/nn/layer/embedding.py +4 -169
mindspore/nn/layer/image.py +1 -1
mindspore/nn/layer/normalization.py +2 -1
mindspore/nn/layer/thor_layer.py +4 -85
mindspore/nn/optim/ada_grad.py +0 -1
mindspore/nn/optim/adafactor.py +0 -1
mindspore/nn/optim/adam.py +32 -127
mindspore/nn/optim/adamax.py +0 -1
mindspore/nn/optim/asgd.py +0 -1
mindspore/nn/optim/ftrl.py +8 -102
mindspore/nn/optim/lamb.py +1 -4
mindspore/nn/optim/lars.py +0 -3
mindspore/nn/optim/lazyadam.py +25 -218
mindspore/nn/optim/momentum.py +5 -43
mindspore/nn/optim/optimizer.py +6 -55
mindspore/nn/optim/proximal_ada_grad.py +0 -1
mindspore/nn/optim/rmsprop.py +0 -1
mindspore/nn/optim/rprop.py +0 -1
mindspore/nn/optim/sgd.py +0 -1
mindspore/nn/optim/tft_wrapper.py +2 -4
mindspore/nn/optim/thor.py +0 -2
mindspore/nn/probability/bijector/bijector.py +7 -8
mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
mindspore/nn/probability/bijector/power_transform.py +20 -21
mindspore/nn/probability/bijector/scalar_affine.py +5 -5
mindspore/nn/probability/bijector/softplus.py +13 -14
mindspore/nn/probability/distribution/_utils/utils.py +2 -2
mindspore/nn/wrap/cell_wrapper.py +39 -5
mindspore/nn/wrap/grad_reducer.py +4 -89
mindspore/numpy/array_creations.py +4 -4
mindspore/numpy/fft.py +9 -9
mindspore/numpy/utils_const.py +1 -1
mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
mindspore/onnx/onnx_export.py +137 -0
mindspore/opencv_core4110.dll +0 -0
mindspore/opencv_imgcodecs4110.dll +0 -0
mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
mindspore/ops/__init__.py +2 -0
mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
mindspore/ops/_op_impl/cpu/__init__.py +1 -5
mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
mindspore/ops/auto_generate/gen_extend_func.py +6 -11
mindspore/ops/auto_generate/gen_ops_def.py +385 -154
mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
mindspore/ops/communication.py +97 -0
mindspore/ops/composite/__init__.py +5 -2
mindspore/ops/composite/base.py +16 -2
mindspore/ops/composite/multitype_ops/__init__.py +3 -1
mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
mindspore/ops/function/__init__.py +2 -0
mindspore/ops/function/array_func.py +24 -18
mindspore/ops/function/comm_func.py +3883 -0
mindspore/ops/function/debug_func.py +7 -6
mindspore/ops/function/grad/grad_func.py +4 -12
mindspore/ops/function/math_func.py +89 -86
mindspore/ops/function/nn_func.py +92 -313
mindspore/ops/function/random_func.py +9 -18
mindspore/ops/functional.py +4 -1
mindspore/ops/functional_overload.py +377 -30
mindspore/ops/operations/__init__.py +2 -5
mindspore/ops/operations/_custom_ops_utils.py +7 -9
mindspore/ops/operations/_inner_ops.py +12 -50
mindspore/ops/operations/_rl_inner_ops.py +0 -933
mindspore/ops/operations/array_ops.py +5 -50
mindspore/ops/operations/comm_ops.py +95 -17
mindspore/ops/operations/custom_ops.py +237 -22
mindspore/ops/operations/debug_ops.py +33 -35
mindspore/ops/operations/manually_defined/ops_def.py +39 -318
mindspore/ops/operations/math_ops.py +5 -5
mindspore/ops/operations/nn_ops.py +3 -3
mindspore/ops/operations/sparse_ops.py +0 -83
mindspore/ops/primitive.py +4 -27
mindspore/ops/tensor_method.py +88 -10
mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
mindspore/ops_generate/api/functions_cc_generator.py +53 -4
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
mindspore/ops_generate/common/gen_constants.py +11 -10
mindspore/ops_generate/common/op_proto.py +18 -1
mindspore/ops_generate/common/template.py +102 -245
mindspore/ops_generate/common/template_utils.py +212 -0
mindspore/ops_generate/gen_custom_ops.py +69 -0
mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
mindspore/ops_generate/resources/yaml_loader.py +13 -0
mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
mindspore/parallel/_auto_parallel_context.py +5 -15
mindspore/parallel/_cell_wrapper.py +1 -1
mindspore/parallel/_parallel_serialization.py +4 -6
mindspore/parallel/_ps_context.py +2 -2
mindspore/parallel/_utils.py +34 -17
mindspore/parallel/auto_parallel.py +23 -9
mindspore/parallel/checkpoint_transform.py +20 -2
mindspore/parallel/cluster/process_entity/_api.py +28 -33
mindspore/parallel/cluster/process_entity/_utils.py +9 -5
mindspore/parallel/cluster/run.py +5 -3
mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
mindspore/parallel/function/reshard_func.py +6 -5
mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
mindspore/parallel/shard.py +7 -21
mindspore/parallel/strategy.py +336 -0
mindspore/parallel/transform_safetensors.py +127 -20
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
mindspore/profiler/common/constant.py +5 -0
mindspore/profiler/common/file_manager.py +9 -0
mindspore/profiler/common/msprof_cmd_tool.py +40 -4
mindspore/profiler/common/path_manager.py +65 -24
mindspore/profiler/common/profiler_context.py +27 -14
mindspore/profiler/common/profiler_info.py +3 -3
mindspore/profiler/common/profiler_meta_data.py +1 -0
mindspore/profiler/common/profiler_op_analyse.py +10 -6
mindspore/profiler/common/profiler_path_manager.py +13 -0
mindspore/profiler/common/util.py +30 -3
mindspore/profiler/dynamic_profiler.py +91 -46
mindspore/profiler/envprofiler.py +30 -5
mindspore/profiler/experimental_config.py +18 -2
mindspore/profiler/platform/cpu_profiler.py +10 -4
mindspore/profiler/platform/npu_profiler.py +34 -7
mindspore/profiler/profiler.py +193 -145
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +2 -2
mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +108 -24
mindspore/runtime/__init__.py +9 -6
mindspore/runtime/executor.py +35 -0
mindspore/runtime/memory.py +113 -0
mindspore/runtime/thread_bind_core.py +1 -1
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
mindspore/tools/data_dump.py +130 -0
mindspore/tools/sdc_detect.py +91 -0
mindspore/tools/stress_detect.py +63 -0
mindspore/train/__init__.py +6 -6
mindspore/train/_utils.py +8 -21
mindspore/train/amp.py +6 -7
mindspore/train/callback/_callback.py +2 -1
mindspore/train/callback/_checkpoint.py +1 -17
mindspore/train/callback/_flops_collector.py +10 -6
mindspore/train/callback/_train_fault_tolerance.py +72 -25
mindspore/train/data_sink.py +5 -9
mindspore/train/dataset_helper.py +5 -5
mindspore/train/model.py +41 -230
mindspore/train/serialization.py +160 -401
mindspore/train/train_thor/model_thor.py +2 -2
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +6 -3
mindspore/utils/dlpack.py +92 -0
mindspore/utils/dryrun.py +1 -1
mindspore/utils/runtime_execution_order_check.py +10 -0
mindspore/utils/sdc_detect.py +14 -12
mindspore/utils/stress_detect.py +43 -0
mindspore/utils/utils.py +152 -16
mindspore/version.py +1 -1
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
mindspore/communication/_hccl_management.py +0 -297
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
mindspore/experimental/llm_boost/atb/__init__.py +0 -23
mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
mindspore/experimental/llm_boost/register.py +0 -130
mindspore/experimental/llm_boost/utils.py +0 -31
mindspore/include/OWNERS +0 -7
mindspore/mindspore_cpu_res_manager.dll +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
mindspore/nn/reinforcement/_batch_read_write.py +0 -142
mindspore/nn/reinforcement/_tensors_queue.py +0 -152
mindspore/nn/reinforcement/tensor_array.py +0 -145
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
mindspore/ops/operations/_tensor_array.py +0 -359
mindspore/ops/operations/rl_ops.py +0 -288
mindspore/parallel/_offload_context.py +0 -275
mindspore/parallel/_recovery_context.py +0 -115
mindspore/parallel/_transformer/__init__.py +0 -35
mindspore/parallel/_transformer/layers.py +0 -765
mindspore/parallel/_transformer/loss.py +0 -251
mindspore/parallel/_transformer/moe.py +0 -693
mindspore/parallel/_transformer/op_parallel_config.py +0 -222
mindspore/parallel/_transformer/transformer.py +0 -3124
mindspore/parallel/mpi/_mpi_config.py +0 -116
mindspore/profiler/common/validator/validate_path.py +0 -84
mindspore/train/memory_profiling_pb2.py +0 -298
mindspore/utils/hooks.py +0 -81
/mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0

mindspore/ops/auto_generate/gen_ops_def.py CHANGED Viewed

@@ -250,11 +250,11 @@ def add_ext(input, other, alpha=1):
         input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
             a bool or a tensor whose data type is
             `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
-            `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
+            `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
         other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
             a bool or a tensor whose data type is
             `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
-            `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
+            `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
         alpha (number.Number): A scaling factor applied to `other`, default 1.
     Returns:
@@ -312,7 +312,7 @@ def add(input, other):
     Note:
         - The two inputs can not be bool type at the same time,
-          [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
+          [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
         - Support broadcast, support implicit type conversion and type promotion.
         - When the input is a tensor, the dimension should be greater than or equal to 1.
@@ -1558,9 +1558,6 @@ def clone(input):
     r"""
     Returns a copy of the input tensor.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Note:
         This function is differentiable, and gradients will flow back directly from the calculation
         result of the function to the `input`.
@@ -1939,9 +1936,6 @@ def count_nonzero(input, dim=None):
     r"""
     Count the number of non-zero elements in the Tensor `input` on a given dimension `dim`. If no dim is specified then all non-zeros in the tensor are counted.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): Input data is used to count non-zero numbers. With shape
             :math:`(*)` where :math:`*` means, any number of additional dimensions.
@@ -1985,6 +1979,112 @@ def count_nonzero(input, dim=None):
     return count_nonzero_op(input, dim)
+def cross_entropy_loss_grad(grad_loss, log_prob, target, weight=None, grad_zloss=None, lse_for_zloss=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0):
+    r"""
+    """
+    return cross_entropy_loss_grad_op(grad_loss, log_prob, target, weight, grad_zloss, lse_for_zloss, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss)
+def cross_entropy_loss(input, target, weight=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0, return_zloss=False):
+    r"""
+    Computes the cross entropy loss between input and target.
+    Assume the number of classes :math:`C` in the range :math:`[0, C)`,
+    the loss with reduction=none can be described as:
+    .. math::
+        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
+        l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
+        \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
+    where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, :math:`N` is the batch size,
+    :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
+    If `reduction` is not ``None`` (default ``'mean'`` ), then
+    .. math::
+        \ell(x, y) = \begin{cases}
+            \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
+            \text{if reduction} = \text{'mean',}\\
+            \sum_{n=1}^N l_n,  &
+            \text{if reduction} = \text{'sum'.}
+            \end{cases}
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Inputs:
+        - **input** (Tensor) - Tensor of shape of :math:`(N, C)` where `C = number of classes`, data type must be bfloat16, float16 or float32.
+        - **target** (Tensor) - For class indices, tensor of shape :math:`(N)`, data type must be int64. The value must be in range [0, C).
+        - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
+          If not None, the shape is :math:`(C,)`, data type must be float32. Default: ``None`` .
+        - **reduction** (str, optional) - Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+          ``'sum'`` . Default: ``'mean'`` .
+          - ``'none'``: no reduction will be applied.
+          - ``'mean'``: compute and return the weighted mean of elements in the output.
+          - ``'sum'``: the output elements will be summed.
+        - **ignore_index** (int, optional) - Specifies a target value that is ignored and does not contribute to the input
+          gradient. When set to negative values, no target value is ignored. It should be int64.
+          Default: ``-100`` .
+        - **label_smoothing** (float, optional) - Label smoothing values, a regularization tool used to prevent the model
+          from overfitting when calculating Loss. This value must be 0.0 currently. Default: ``0.0`` .
+        - **lse_square_scale_for_zloss** (float, optional) - The value range is [0.0, 1.0), not enabled for now, can only be 0.0. Default: ``0.0`` .
+        - **return_zloss** (float, optional) - Not enabled for now, can only be ``False``. Default: ``False`` .
+    Outputs:
+        A tuple consisting of 4 Tensors.
+        - **loss** (Tensor) - loss between `input` and `target`, the dtype is the same as `input`.
+          - If `reduction` is ``'none'`` , the shape is :math:`(N,)` .
+          - If `reduction` is ``'sum'` or ``'mean'`, the shape is :math:`(1,)` .
+        - **log_prob** (Tensor) - the shape is :math:`(N, C)` with the same dtype as `input`.
+        - **zloss** (Tensor) - the shape is :math:`(N,)` if `return_zloss` is True, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
+        - **lse_for_zloss** (Tensor) - the shape is :math:`(N,)` if `lse_square_scale_for_zloss` is not 0.0, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
+    Raises:
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
+        TypeError: If `input`, `target` or `weight` is not a Tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, nn, ops
+        >>>
+        >>>
+        >>> class Net(nn.Cell):
+        ...     def __init__(self):
+        ...         super(Net, self).__init__()
+        ...         self.cross_entropy_loss = ops.auto_generate.CrossEntropyLoss()
+        ...
+        ...     def construct(self, input, target, weight):
+        ...         result = self.cross_entropy_loss(input, target, weight)
+        ...         return result
+        ...
+        >>>
+        >>> net = Net()
+        >>> input = Tensor(np.array([[0.2, 0.7, 0.1], [0.2, 0.7, 0.1]]), mindspore.float32)
+        >>> target = Tensor(np.array([0, 1]), mindspore.int64)
+        >>> weight = Tensor(np.array([1, 0.5, 0.5]), mindspore.float32)
+        >>> output = net(input, target, weight)
+        >>> print(output[:2])
+        (Tensor(shape=[1], dtype=Float32, value= [ 1.10128295e+00]), Tensor(shape=[2, 3], dtype=Float32, value=
+        [[-1.26794958e+00, -7.67949641e-01, -1.36794960e+00],
+         [-1.26794958e+00, -7.67949641e-01, -1.36794960e+00]]))
+    """
+    return cross_entropy_loss_op(input, target, weight, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss, return_zloss)
 def cummax(input, axis):
     r"""
     Return the cumulative maximum values and their indices along the given axis of the tensor.
@@ -2162,6 +2262,13 @@ def dense(input, weight, bias=None):
     return dense_op(input, weight, bias)
+def dequant_swiglu_quant(x, weight_scale, activation_scale, bias=None, quant_scale=None, quant_offset=None, group_index=None, activate_left=False, quant_mode='static'):
+    r"""
+    """
+    return dequant_swiglu_quant_op(x, weight_scale, activation_scale, bias, quant_scale, quant_offset, group_index, activate_left, quant_mode)
 def diagonal(input, offset=0, dim1=0, dim2=1):
     r"""
     Returns diagonals of the input tensor along specified dimension.
@@ -2330,9 +2437,6 @@ def dot(input, other):
     r"""
     Computes the dot product of two 1D tensor.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): The first input in the dot product, must be 1D.
         other (Tensor): The second input in the dot product, must be 1D.
@@ -2467,104 +2571,6 @@ def elu(input_x, alpha=1.0):
     return elu_op(input_x)
-def embedding_apply_adam_w(var_handle, beta1_power, beta2_power, lr, weight_decay, beta1, beta2, epsilon, grad, keys, max_grad_norm, global_step, embedding_dim, ams_grad=(0,), mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_adam_w_op(var_handle, beta1_power, beta2_power, lr, weight_decay, beta1, beta2, epsilon, grad, keys, max_grad_norm, global_step, embedding_dim, ams_grad, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_apply_adam(var_handle, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, keys, global_step, embedding_dim, mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_adam_op(var_handle, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, keys, global_step, embedding_dim, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_apply_ada_grad(var_handle, lr, grad, keys, global_step, embedding_dim, mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_ada_grad_op(var_handle, lr, grad, keys, global_step, embedding_dim, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_apply_ftrl(var_handle, lr, lr_power, lambda1, lambda2, grad, keys, global_step, embedding_dim, mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_ftrl_op(var_handle, lr, lr_power, lambda1, lambda2, grad, keys, global_step, embedding_dim, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_apply_rmsprop(var_handle, lr, rho, momentum, epsilon, grad, keys, global_step, embedding_dim, mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_rmsprop_op(var_handle, lr, rho, momentum, epsilon, grad, keys, global_step, embedding_dim, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_apply_sgd(var_handle, lr, grad, keys, global_step, embedding_dim, mask_zero=(0,), padding_key=(0,), padding_key_mask=(1,), completion_key=(0,), completion_key_mask=(1,), _embedding_dim=1, _max_key_num=1):
-    r"""
-    """
-    return embedding_apply_sgd_op(var_handle, lr, grad, keys, global_step, embedding_dim, mask_zero, padding_key, padding_key_mask, completion_key, completion_key_mask, _embedding_dim, _max_key_num)
-def embedding_feature_mapping_export(file_path, table_name, global_step, values, embedding_dim, feature_id, offset_id):
-    r"""
-    """
-    return embedding_feature_mapping_export_op(file_path, table_name, global_step, values, embedding_dim, feature_id, offset_id)
-def embedding_feature_mapping_file_size(file_path, table_name, global_step, embedding_dim, only_offset_flag=True):
-    r"""
-    """
-    return embedding_feature_mapping_file_size_op(file_path, table_name, global_step, embedding_dim, only_offset_flag)
-def embedding_feature_mapping_find(table_name, feature_size, num=1):
-    r"""
-    """
-    return embedding_feature_mapping_find_op(table_name, feature_size, num)
-def embedding_feature_mapping_import(file_path, teble_name, feature_size, global_step, embedding_dim, only_offset_flag=True, num=1):
-    r"""
-    """
-    return embedding_feature_mapping_import_op(file_path, teble_name, feature_size, global_step, embedding_dim, only_offset_flag, num)
-def embedding_feature_mapping_insert(table_name, num, feature_id, offset_id):
-    r"""
-    """
-    return embedding_feature_mapping_insert_op(table_name, num, feature_id, offset_id)
-def embedding_feature_mapping_table_size(table_name):
-    r"""
-    """
-    return embedding_feature_mapping_table_size_op(table_name)
-def embedding_feature_mapping_v2(table_name, feature_id, table_total_size, table_actual_size):
-    r"""
-    """
-    return embedding_feature_mapping_v2_op(table_name, feature_id, table_total_size, table_actual_size)
-def embedding_table_evict(var_handle, global_step, steps_to_live=0):
-    r"""
-    """
-    return embedding_table_evict_op(var_handle, global_step, steps_to_live)
 def equal(input, other):
     r"""
     Compute the equivalence of the two inputs element-wise.
@@ -3415,6 +3421,43 @@ def floor(input):
     return floor_op(input)
+def format_cast(input, acl_format):
+    r"""
+    Change tensor format.
+    .. warning::
+        FormatCast will not work in the ge backend, origin input will be returned.
+    Args:
+        input (Tensor): The input tensor.
+        acl_format (int): enum value of acl format, the valid values are below:
+            - ``0`` NCHW
+            - ``1`` NHWC
+            - ``2`` ND
+            - ``3`` NC1HWC0
+            - ``4`` FRACTAL_Z
+            - ``27`` NDHWC
+            - ``29`` FRACTAL_NZ
+            - ``30`` NCDHW
+            - ``32`` NDC1HWC0
+            - ``33`` FRACTAL_Z_3D
+    Returns:
+        Tensor
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> input = mindspore.ops.randn((2, 3, 4, 5))
+        >>> output = mindspore.ops.format_cast(input, 2)
+        >>> print(output.shape)
+        (2, 3, 4, 5)
+    """
+    return format_cast_op(input, acl_format)
 def frac_ext(input):
     r"""
     Calculates the fractional part of each element in the input.
@@ -3526,7 +3569,7 @@ def gather(input_params, input_indices, axis, batch_dims=0):
         - The value of input_indices must be in the range of `[0, input_param.shape[axis])`.
           On CPU and GPU, an error is raised if an out of bound indice is found. On Ascend, the results may be
           undefined.
-        - The data type of input_params cannot be `mindspore.bool_` .
+        - The data type of input_params cannot be `mindspore.bool` .
         - The shape of returned tensor is :math:`input\_params.shape[:axis] + input\_indices.shape[batch\_dims:] + input\_params.shape[axis + 1:]` .
     Args:
@@ -3910,7 +3953,6 @@ def histc_ext(input, bins=100, min=0, max=0):
     Elements lower than min or higher than max are ignored.
     .. warning::
-        This is an experimental API that is subject to change or deletion.
         If input is int64, valid values fit within int32; exceeding this may cause precision errors.
     Args:
@@ -4622,7 +4664,7 @@ def index(input, indices):
         [2 6 5]
         >>> input2 = Tensor(np.arange(4 * 3 * 3).reshape(4, 3, 3), mindspore.int32)
         >>> indices3 = Tensor(np.array([1, 0]), mindspore.int32)
-        >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool_)
+        >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool)
         >>> output2 = ops.auto_generate.index(input2, [indices3, indices4])
         >>> print(output2)
         [[ 9 10 11]
@@ -4698,6 +4740,20 @@ def inplace_add_ext(input, other, alpha=1):
     return inplace_add_ext_op(input, other, alpha)
+def inplace_bernoulli_scalar(input, p, seed, offset):
+    r"""
+    """
+    return inplace_bernoulli_scalar_op(input, p, seed, offset)
+def inplace_bernoulli_tensor(input, p, seed, offset):
+    r"""
+    """
+    return inplace_bernoulli_tensor_op(input, p, seed, offset)
 def inplace_clamp_scalar(input, min=None, max=None):
     r"""
@@ -4712,11 +4768,11 @@ def inplace_clamp_tensor(input, min=None, max=None):
     return inplace_clamp_tensor_op(input, min, max)
-def inplace_copy(input, src):
+def inplace_copy(input, src, non_blocking=False):
     r"""
     """
-    return inplace_copy_op(input, src)
+    return inplace_copy_op(input, src, non_blocking)
 def divmod_scalar_(input, other, rounding_mode=None):
@@ -5064,6 +5120,25 @@ def inplace_scatter_add(input, dim, index, src):
     return inplace_scatter_add_op(input, dim, index, src)
+def inplace_sigmoid(input):
+    r"""
+    sigmoid_() -> Tensor
+    In-place version of sigmoid().
+    .. warning::
+        Only supports Ascend.
+    """
+    return inplace_sigmoid_op(input)
+def inplace_sign(input):
+    r"""
+    """
+    return inplace_sign_op(input)
 def inplace_silu(input):
     r"""
     Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
@@ -5380,7 +5455,7 @@ def isinf(input):
     Return a boolean tensor indicating which elements are +/- inifnity.
     .. warning::
-        - This is an experimental API that is subject to change.
+        - This is an experimental API that is subject to change or deletion.
         - For Ascend, it is only supported on platforms above Atlas A2.
     Args:
@@ -5491,6 +5566,13 @@ def kthvalue(input, k, dim=-1, keepdim=False):
     return kthvalue_op(input, k, dim, keepdim)
+def kv_scale_cache(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode):
+    r"""
+    """
+    return kv_scale_cache_op(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode)
 def l1_loss_ext(input, target, reduction='mean'):
     r"""
     Calculate the mean absolute error between the `input` value and the `target` value.
@@ -6146,7 +6228,7 @@ def masked_fill(input_x, mask, value):
     Examples:
         >>> import mindspore
         >>> input_x = mindspore.tensor([1., 2., 3., 4.], mindspore.float32)
-        >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool_)
+        >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool)
         >>> output = mindspore.ops.masked_fill(input_x, mask, 0.5)
         >>> print(output)
         [0.5 0.5 3.  0.5]
@@ -6154,6 +6236,13 @@ def masked_fill(input_x, mask, value):
     return masked_fill_op(input_x, mask, value)
+def masked_scatter(input, mask, source):
+    r"""
+    """
+    return masked_scatter_op(input, mask, source)
 def masked_select(input, mask):
     r"""
     Return a new 1-D tensor which indexes the `input` tensor according to the boolean `mask`.
@@ -6173,7 +6262,7 @@ def masked_select(input, mask):
     Examples:
         >>> import mindspore
         >>> x = mindspore.tensor([1, 2, 3, 4], mindspore.int64)
-        >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool_)
+        >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool)
         >>> output = mindspore.ops.masked_select(x, mask)
         >>> print(output)
         [1 3]
@@ -6550,6 +6639,20 @@ def mish_ext(input):
     return mish_ext_op(input)
+def mla(query, q_rope, kv_cache, k_rope, block_tables, attn_mask=None, deq_scale_qk=None, deq_scale_pv=None, q_seq_lens=None, context_lens=None, head_num=32, scale_value=0.0, kv_head_num=1, mask_mode='MASK_NONE', is_ring=0):
+    r"""
+    """
+    return mla_op(query, q_rope, kv_cache, k_rope, block_tables, attn_mask, deq_scale_qk, deq_scale_pv, q_seq_lens, context_lens, head_num, scale_value, kv_head_num, mask_mode, is_ring)
+def mla_preprocess(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode=0):
+    r"""
+    """
+    return mla_preprocess_op(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode)
 def mm_ext(input, mat2):
     r"""
     Returns the matrix product of two arrays.
@@ -6978,7 +7081,7 @@ def mul(input, other):
         - When the two inputs have different shapes,
           they must be able to broadcast to a common shape.
         - The two inputs can not be bool type at the same time,
-          [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
+          [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
         - Support implicit type conversion and type promotion.
     Args:
@@ -7223,11 +7326,18 @@ def nextafter(input, other):
     Examples:
         >>> import mindspore
-        >>> input = mindspore.tensor([0.0], mindspore.float32)
-        >>> other = mindspore.tensor([0.1], mindspore.float32)
+        >>> import numpy as np
+        >>> eps = np.finfo(np.float32).eps
+        >>> input = mindspore.tensor([1.0], mindspore.float32)
+        >>> other = mindspore.tensor([2.0], mindspore.float32)
         >>> output = mindspore.ops.nextafter(input, other)
-        >>> print(output)
-        [1.e-45]
+        >>> print(output == eps + 1)
+        [ True]
+        >>> input = mindspore.tensor([1.0, 2.0], mindspore.float32)
+        >>> other = mindspore.tensor([2.0, 1.0], mindspore.float32)
+        >>> output = mindspore.ops.nextafter(input, other)
+        >>> print(output == mindspore.tensor([eps + 1, 2 - eps], mindspore.float32))
+        [ True True]
     """
     return next_after_op(input, other)
@@ -7281,9 +7391,6 @@ def outer_ext(input, vec2):
     Return outer product of `input` and `vec2`. If `input` is a vector of size :math:`n`
     and `vec2` is a vector of size :math:`m` , then output must be a matrix of shape :math:`(n, m)` .
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     .. note::
         This function does not broadcast.
@@ -7374,8 +7481,10 @@ def prelu(input, weight):
         :align: center
     .. note::
-        Channel dim is the 2nd dim of input. When input has dims < 2, then there is
-        no channel dim and the number of channels = 1.
+        - Channel dim is the 2nd dim of input. When input has dims < 2, then there is
+          no channel dim and the number of channels = 1.
+        - In GE mode, the rank of the input tensor must be greater than 1;
+          otherwise, an error will be triggered.
     Args:
         input (Tensor): The input Tensor of the activation function.
@@ -7528,12 +7637,13 @@ def range(start, end, step, maxlen=1000000):
     Returns a tensor with a step length of `step` in the interval [ `start` , `end` ).
     .. note::
-        The types of all 3 inputs must be all integers or floating-point numbers.
+        - The types of all 3 inputs must be all integers or floating-point numbers.
+        - When the input is a tensor, the tensor must contain only one element, whose dtype is Number.
     Args:
-        start (number): The start value of the interval.
-        end (number): The end value of the interval.
-        step (number): The interval between each value.
+        start (Union[Number, Tensor]): The start value of the interval.
+        end (Union[Number, Tensor]): The end value of the interval.
+        step (Union[Number, Tensor]): The interval between each value.
         maxlen (int, optional): Memory that can fit `maxlen` many elements
             will be allocated for the output. Optional, must be positive. Default: 1000000.
             If the output has more than `maxlen` elements, a runtime error will occur.
@@ -8020,6 +8130,78 @@ def rfft(input, n=None, dim=-1, norm=None):
     return rfft_op(input, n, dim, norm)
+def ring_attention_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'):
+    r"""
+    The RingAttentionUpdate operator updates the output of two FlashAttention operations based on their respective softmax max and softmax sum values.
+    - S: Sequence length
+    - B: Batch dimension
+    - H: Hidden layer size, equals to N * D
+    - T: time, equals to B*S
+    - N: Number of attention heads
+    - D: Head dimension
+    .. warning::
+        - It is only supported on Atlas A2 Training Series Products.
+        - This is an experimental API that is subject to change or deletion.
+        - When `layout` is ``"TND"``, the last dimension of `prev_attn_out` must be a multiple of 64.
+        - When `layout` is ``"TND"``, `actual_seq_qlen` is mandatory.
+        - When `layout` is ``"TND"``, N * D must satisfy the constraint:
+          :math:`(\text{AlignUp}(N*D, 64)*(DataSize*6+8))+(\text{AlignUp}(N*8, 64)*56) <= 192*1024`.
+          :math:`DataSize` is 4 bytes when `prev_attn_out` dtype is float32, 2 bytes when dtype is float16 / bfloat16.
+        - When `layout` is ``"TND"``, if `actual_seq_qlen` is not a non-decreasing sequence from 0 to T, the result is undefined.
+    Args:
+        prev_attn_out (Tensor): Output of the first FlashAttention operation. The dtype is float16, float32, bfloat16.
+            The shape is :math:`(S, B, H)` or :math:`(T, N, D)`.
+        prev_softmax_max (Tensor): The max values from the first FlashAttention softmax computation. The dtype float32.
+            The shape is :math:`(B, N, S, 8)` or :math:`(T, N, 8)`. The last dimension contains 8 identical values, which must be positive.
+        prev_softmax_sum (Tensor): The sum values from the first FlashAttention softmax computation.
+            It has the same shape and dtype as `prev_softmax_max`.
+        cur_attn_out (Tensor): Output of the second FlashAttention operation. It has the same shape and dtype as `prev_attn_out`.
+        cur_softmax_max (Tensor): The max values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
+        cur_softmax_sum (Tensor):The sum values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
+        actual_seq_qlen (Tensor, optional): Cumulative sequence length, starting from 0. Required if `layout` is ``"TND"``. Does not take effect if `layout` is ``"SBH"``.
+            The tensor must be 1D and contain non-decreasing integer values starting from 0 to T. Default: ``None``.
+        layout (str, optional): Indicates the input layout, currently support ``"TND"`` and ``"SBH"``. Default: ``"SBH"``.
+    Returns:
+        tuple (Tensor), tuple of 3 tensors.
+        - **attn_out** (Tensor) - The updated attention out, with the same shape and dtype as `prev_attn_out`.
+        - **softmax_max** (Tensor) - The updated softmax max values, with the same shape and dtype as `prev_softmax_max`.
+        - **softmax_sum** (Tensor) - The updated softmax sum values, with the same shape and dtype as `prev_softmax_max`.
+    Raises:
+        RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out`'s last dimension is not aligned to 64.
+        RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not provided.
+        RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not a non-decreasing sequence from 0 to T.
+        RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out` exceeds the size constraints.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import numpy as np
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> np.random.seed(123)
+        >>> S, B, H, N= 4, 6, 16, 8
+        >>> prev_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
+        >>> prev_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
+        >>> prev_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
+        >>> cur_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
+        >>> cur_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
+        >>> cur_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
+        >>> inputs_np = [prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum]
+        >>> inputs_ms = [Tensor(item) for item in inputs_np]
+        >>> out = ops.ring_attention_update(*inputs_ms)
+        >>> print(out[0].shape)
+        (4, 6, 16)
+    """
+    return ring_attention_update_op(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen, layout)
 def rms_norm(x, gamma, epsilon=1e-6):
     r"""
     The RmsNorm(Root Mean Square Layer Normalization) operator is a normalization operation. Compared to
@@ -8215,7 +8397,7 @@ def scalar_cast(input_x, input_y):
     Args:
         input_x (scalar): The input scalar. Only constant value is allowed.
-        input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool_.
+        input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool.
     Returns:
         Scalar. The type is the same as the python type corresponding to `input_y`.
@@ -8725,6 +8907,58 @@ def sin(input):
     return sin_op(input)
+def smooth_l1_loss(prediction, target, beta=1.0, reduction='none'):
+    r"""
+    Calculate the smooth L1 loss, and the L1 loss function has robustness.
+    Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
+    .. warning::
+        This API has poor performance on CPU and it is recommended to run it on the Ascend/GPU.
+    Args:
+        beta (number, optional): A parameter used to control the point where the function will change between
+            L1 to L2 loss. Default: ``1.0`` .
+            - Ascend: The value should be equal to or greater than zero.
+            - CPU/GPU: The value should be greater than zero.
+        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
+            ``'sum'`` . Default: ``'none'`` .
+            - ``'none'``: no reduction will be applied.
+            - ``'mean'``: compute and return the mean of elements in the output.
+            - ``'sum'``: the output elements will be summed.
+    Inputs:
+        - **logits** (Tensor) - Input Tensor of any dimension. Supported dtypes:
+          - Ascend: float16, float32, bfloat16.
+          - CPU/GPU: float16, float32, float64.
+        - **labels** (Tensor) - Ground truth data.
+          - CPU/Ascend: has the same shape as the `logits`, `logits` and `labels` comply with the implicit type conversion rules to make the data types consistent.
+          - GPU: has the same shape and dtype as the `logits`.
+    Outputs:
+        Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `logits`. Otherwise the shape of output tensor is :math:`()`.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> loss = ops.SmoothL1Loss()
+        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
+        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
+        >>> output = loss(logits, labels)
+        >>> print(output)
+        [0.  0.  0.5]
+    """
+    return smooth_l1_loss_impl(prediction, target, beta, reduction)
 def softplus_ext(input, beta=1, threshold=20):
     r"""
     Applies softplus function to `input` element-wise.
@@ -9029,14 +9263,13 @@ def stack_ext(tensors, dim=0):
     :math:`(x_1, x_2, ..., x_{dim}, N, x_{dim+1}, ..., x_R)`.
     Args:
-        tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape and type.
+        tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape.
         dim (int, optional): Dimension to stack. The range is [-(R+1), R+1). Default: ``0`` .
     Returns:
-        Tensor. A stacked Tensor with the same type as `tensors`.
+        A stacked Tensor.
     Raises:
-        TypeError: If the data types of elements in `tensors` are not the same.
         ValueError: If `dim` is out of the range [-(R+1), R+1);
                     or if the shapes of elements in `tensors` are not the same.
@@ -9184,11 +9417,11 @@ def sub_ext(input, other, alpha=1):
         input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
             a bool or a tensor whose data type is
             `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
-            `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
+            `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
         other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
             a bool or a tensor whose data type is
             `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
-            `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
+            `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
         alpha (number.Number): A scaling factor applied to `other`, default 1.
     Returns:
@@ -9233,7 +9466,7 @@ def sub(input, other):
     Note:
         - When the two inputs have different shapes, they must be able to broadcast to a common shape.
         - The two inputs can not be bool type at the same time,
-          [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
+          [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
         - Support implicit type conversion and type promotion.
     Args:
@@ -9672,9 +9905,6 @@ def transpose_ext_view(input, dim0, dim1):
     r"""
     Interchange two axes of a tensor.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input(Tensor): Input tensor.
         dim0 (int): First axis.
@@ -9702,17 +9932,17 @@ def transpose_ext_view(input, dim0, dim1):
     return transpose_ext_view_op(input, dim0, dim1)
-def transpose(input, input_perm):
+def transpose(input, dims):
     r"""
     Transpose dimensions of the input tensor according to input permutation.
     Note:
-        On GPU and CPU, if the value of `input_perm` is negative, its actual value is `input_perm[i] + rank(input)`.
-        Negative value of `input_perm` is not supported on Ascend.
+        On GPU and CPU, if the value of `dims` is negative, its actual value is `dims[i] + rank(input)`.
+        Negative value of `dims` is not supported on Ascend.
     Args:
         input (Tensor): The input tensor.
-        input_perm (tuple[int]): Specify the new axis ordering.
+        dims (Union[tuple[int], list[int]]): Specify the new axis ordering.
     Returns:
         Tensor
@@ -9732,7 +9962,7 @@ def transpose(input, input_perm):
           [ 8. 11.]
           [ 9. 12.]]]
     """
-    return transpose_op(input, input_perm)
+    return transpose_op(input, dims)
 def transpose_view(input, input_perm):
@@ -9846,9 +10076,6 @@ def triu(input, diagonal=0):
     r"""
     Zero the input tensor below the diagonal specified.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): The input tensor.
         diagonal (int, optional): The diagonal specified of 2-D tensor. Default ``0`` represents the main diagonal.
@@ -10283,7 +10510,7 @@ def grouped_matmul_v2(x, weight, bias=None, scale=None, offset=None, antiquant_s
     return grouped_matmul_v2_op(x, weight, bias, scale, offset, antiquant_scale, antiquant_offset, group_list, split_item, group_type)
-def grouped_matmul_v4(x, weight, bias=None, scale=None, offset=None, antiquant_scale=None, antiquant_offset=None, pre_token_scale=None, group_list=None, activation_input=None, activation_quant_scale=None, activation_quant_offset=None, split_item=0, group_type=-1, group_list_type=0, act_type=0):
+def grouped_matmul_v4(x, weight, bias=None, scale=None, offset=None, antiquant_scale=None, antiquant_offset=None, pre_token_scale=None, group_list=None, activation_input=None, activation_quant_scale=None, activation_quant_offset=None, split_item=0, group_type=-1, group_list_type=0, act_type=0, output_dtype=None):
     r"""
     Group calculation matmul.
@@ -10298,8 +10525,10 @@ def grouped_matmul_v4(x, weight, bias=None, scale=None, offset=None, antiquant_s
             y_i = x_i\times (weight_i + antiquant\_offset_i) * antiquant\_scale_i + bias_i
     .. note::
-        Only when `bias` , `scale` , `offset` , `antiquant_scale` and `antiquant_offset` are all None, `group_type` is 0,
-        and `split_item` is 3, the reverse derivative is supported.
+        - Only when `bias` , `scale` , `offset` , `antiquant_scale` and `antiquant_offset` are all None, `group_type` is 0,
+          and `split_item` is 3, the reverse derivative is supported.
+        - When `x` type is int8 and `weight` type is int4, the `scale` should be of the uint64 data type,
+          but its memory needs to be arranged in float32 format.
     ** Per-Token-Quant **
@@ -10339,6 +10568,8 @@ def grouped_matmul_v4(x, weight, bias=None, scale=None, offset=None, antiquant_s
                                as the cumsum of grouping size in each group, and 1 represents the positions as the grouping size in
                                each group. Default: ``0``.
         act_type (int): Activation function type. Currently not supported. Default: ``0``.
+        output_dtype (mindspore.dtype): Specifies the output data type, currently taking effect only when input x is int8 and weight is int4.
+                                        If None is passed in, bfloat16 will be used by default. Default: ``None``.
         Parameter limitations 1
@@ -10429,7 +10660,7 @@ def grouped_matmul_v4(x, weight, bias=None, scale=None, offset=None, antiquant_s
          [108 112]
          [108 112]]
     """
-    return grouped_matmul_v4_op(x, weight, bias, scale, offset, antiquant_scale, antiquant_offset, pre_token_scale, group_list, activation_input, activation_quant_scale, activation_quant_offset, split_item, group_type, group_list_type, act_type)
+    return grouped_matmul_v4_op(x, weight, bias, scale, offset, antiquant_scale, antiquant_offset, pre_token_scale, group_list, activation_input, activation_quant_scale, activation_quant_offset, split_item, group_type, group_list_type, act_type, output_dtype)
 def kv_cache_scatter_update(var, indices, updates, axis, reduce='none'):