PyPI - mindspore - Versions diffs - 2.6.0rc1__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl - Mend

mindspore 2.6.0rc1__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (407) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +1 -1
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +40 -9
mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
mindspore/_extends/optimize/cell_utils.py +96 -0
mindspore/_extends/parse/__init__.py +2 -2
mindspore/_extends/parse/compile_config.py +44 -22
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
mindspore/_extends/parse/parser.py +37 -62
mindspore/_extends/parse/resources.py +39 -0
mindspore/_extends/parse/standard_method.py +43 -13
mindspore/_extends/parse/trope.py +8 -1
mindspore/_extends/pijit/__init__.py +1 -2
mindspore/amp.py +4 -4
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/adasum.py +1 -1
mindspore/boost/boost_cell_wrapper.py +4 -4
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +27 -2
mindspore/common/_grad_function.py +2 -1
mindspore/common/_pijit_context.py +28 -7
mindspore/common/_stub_tensor.py +1 -209
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +77 -16
mindspore/common/api.py +238 -113
mindspore/common/dtype.py +21 -11
mindspore/common/dump.py +10 -15
mindspore/common/generator.py +5 -3
mindspore/common/hook_handle.py +11 -2
mindspore/common/jit_config.py +1 -1
mindspore/common/jit_trace.py +84 -105
mindspore/common/parameter.py +26 -12
mindspore/common/recompute.py +3 -3
mindspore/common/sparse_tensor.py +0 -3
mindspore/common/symbol.py +0 -1
mindspore/common/tensor.py +81 -81
mindspore/communication/_comm_helper.py +46 -4
mindspore/communication/management.py +79 -7
mindspore/context.py +58 -40
mindspore/dataset/core/config.py +3 -3
mindspore/dataset/engine/datasets.py +20 -7
mindspore/dataset/engine/datasets_user_defined.py +33 -3
mindspore/dataset/engine/iterators.py +2 -2
mindspore/dataset/engine/obs/config_loader.py +2 -2
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
mindspore/dataset/transforms/py_transforms.py +7 -3
mindspore/dataset/transforms/transforms.py +7 -3
mindspore/dataset/vision/validators.py +1 -0
mindspore/device_context/ascend/device.py +1 -1
mindspore/device_context/gpu/__init__.py +2 -2
mindspore/device_context/gpu/device.py +1 -1
mindspore/device_context/gpu/op_precision.py +4 -2
mindspore/device_context/gpu/op_tuning.py +6 -3
mindspore/device_manager.py +16 -9
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
mindspore/experimental/optim/adadelta.py +13 -20
mindspore/experimental/optim/adagrad.py +15 -22
mindspore/experimental/optim/adam.py +17 -24
mindspore/experimental/optim/adamax.py +14 -22
mindspore/experimental/optim/adamw.py +28 -34
mindspore/experimental/optim/asgd.py +15 -25
mindspore/experimental/optim/lr_scheduler.py +27 -45
mindspore/experimental/optim/nadam.py +14 -24
mindspore/experimental/optim/optimizer.py +13 -23
mindspore/experimental/optim/radam.py +18 -24
mindspore/experimental/optim/rmsprop.py +14 -25
mindspore/experimental/optim/rprop.py +15 -26
mindspore/experimental/optim/sgd.py +9 -19
mindspore/hal/__init__.py +4 -4
mindspore/hal/contiguous_tensors_handle.py +2 -2
mindspore/hal/memory.py +27 -7
mindspore/include/api/cell.h +37 -1
mindspore/include/api/delegate.h +10 -0
mindspore/include/api/model.h +3 -0
mindspore/include/api/types.h +2 -2
mindspore/include/c_api/model_c.h +0 -58
mindspore/include/c_api/tensor_c.h +0 -26
mindspore/include/dataset/vision_ascend.h +1 -1
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/tools/cifar10.py +60 -11
mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_cpu_res_manager.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mindspore_ops_host.dll +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +6 -46
mindspore/mint/distributed/__init__.py +1 -0
mindspore/mint/distributed/distributed.py +212 -9
mindspore/mint/nn/__init__.py +1 -1
mindspore/mint/nn/functional.py +53 -6
mindspore/mint/nn/layer/_functions.py +164 -294
mindspore/mint/nn/layer/activation.py +8 -6
mindspore/mint/nn/layer/conv.py +137 -101
mindspore/mint/nn/layer/normalization.py +8 -22
mindspore/mint/optim/adam.py +19 -18
mindspore/mint/optim/adamw.py +14 -8
mindspore/mint/optim/sgd.py +5 -5
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/cell.py +328 -502
mindspore/nn/grad/cell_grad.py +11 -12
mindspore/nn/layer/activation.py +32 -34
mindspore/nn/layer/basic.py +67 -64
mindspore/nn/layer/channel_shuffle.py +4 -4
mindspore/nn/layer/combined.py +4 -2
mindspore/nn/layer/conv.py +117 -110
mindspore/nn/layer/dense.py +9 -7
mindspore/nn/layer/embedding.py +50 -52
mindspore/nn/layer/image.py +37 -39
mindspore/nn/layer/math.py +111 -112
mindspore/nn/layer/normalization.py +56 -44
mindspore/nn/layer/pooling.py +58 -63
mindspore/nn/layer/rnn_cells.py +33 -33
mindspore/nn/layer/rnns.py +56 -56
mindspore/nn/layer/thor_layer.py +74 -73
mindspore/nn/layer/transformer.py +11 -1
mindspore/nn/learning_rate_schedule.py +20 -20
mindspore/nn/loss/loss.py +79 -81
mindspore/nn/optim/adam.py +3 -3
mindspore/nn/optim/adasum.py +2 -2
mindspore/nn/optim/asgd.py +2 -0
mindspore/nn/optim/optimizer.py +1 -1
mindspore/nn/optim/thor.py +2 -2
mindspore/nn/probability/distribution/exponential.py +2 -1
mindspore/nn/probability/distribution/poisson.py +2 -1
mindspore/nn/sparse/sparse.py +3 -3
mindspore/nn/wrap/cell_wrapper.py +34 -37
mindspore/nn/wrap/grad_reducer.py +37 -37
mindspore/nn/wrap/loss_scale.py +72 -74
mindspore/numpy/array_creations.py +5 -5
mindspore/numpy/fft.py +1 -1
mindspore/numpy/math_ops.py +5 -5
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
mindspore/ops/_vmap/vmap_array_ops.py +31 -13
mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
mindspore/ops/auto_generate/gen_extend_func.py +23 -141
mindspore/ops/auto_generate/gen_ops_def.py +727 -321
mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
mindspore/ops/composite/__init__.py +10 -0
mindspore/ops/composite/base.py +8 -4
mindspore/ops/composite/multitype_ops/__init__.py +12 -1
mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
mindspore/ops/function/__init__.py +3 -1
mindspore/ops/function/_add_attr_func.py +11 -6
mindspore/ops/function/array_func.py +9 -96
mindspore/ops/function/debug_func.py +4 -3
mindspore/ops/function/grad/grad_func.py +1 -1
mindspore/ops/function/math_func.py +33 -540
mindspore/ops/function/nn_func.py +28 -74
mindspore/ops/function/other_func.py +4 -1
mindspore/ops/function/random_func.py +44 -5
mindspore/ops/function/vmap_func.py +2 -1
mindspore/ops/functional.py +2 -3
mindspore/ops/functional_overload.py +571 -6
mindspore/ops/op_info_register.py +21 -0
mindspore/ops/operations/__init__.py +16 -11
mindspore/ops/operations/_custom_ops_utils.py +689 -34
mindspore/ops/operations/_inner_ops.py +3 -6
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/array_ops.py +2 -2
mindspore/ops/operations/comm_ops.py +185 -26
mindspore/ops/operations/custom_ops.py +294 -174
mindspore/ops/operations/debug_ops.py +59 -4
mindspore/ops/operations/image_ops.py +13 -13
mindspore/ops/operations/manually_defined/ops_def.py +15 -16
mindspore/ops/operations/math_ops.py +3 -4
mindspore/ops/operations/nn_ops.py +7 -39
mindspore/ops/primitive.py +6 -10
mindspore/ops/tensor_method.py +47 -8
mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
mindspore/ops_generate/api/functions_cc_generator.py +58 -10
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
mindspore/ops_generate/common/base_generator.py +14 -0
mindspore/ops_generate/common/gen_constants.py +8 -3
mindspore/ops_generate/common/gen_utils.py +0 -19
mindspore/ops_generate/common/op_proto.py +11 -4
mindspore/ops_generate/common/template.py +88 -11
mindspore/ops_generate/gen_ops.py +1 -1
mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
mindspore/parallel/_auto_parallel_context.py +11 -8
mindspore/parallel/_cell_wrapper.py +113 -45
mindspore/parallel/_parallel_serialization.py +1 -1
mindspore/parallel/_ps_context.py +4 -6
mindspore/parallel/_tensor.py +167 -12
mindspore/parallel/_transformer/moe.py +1 -1
mindspore/parallel/_transformer/transformer.py +13 -8
mindspore/parallel/auto_parallel.py +14 -7
mindspore/parallel/checkpoint_convert.py +3 -3
mindspore/parallel/checkpoint_transform.py +11 -7
mindspore/parallel/cluster/process_entity/_api.py +84 -48
mindspore/parallel/cluster/process_entity/_utils.py +95 -7
mindspore/parallel/cluster/run.py +43 -4
mindspore/parallel/function/__init__.py +8 -1
mindspore/parallel/function/reshard_func.py +6 -7
mindspore/parallel/nn/__init__.py +15 -2
mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
mindspore/parallel/shard.py +3 -4
mindspore/parallel/transform_safetensors.py +463 -174
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
mindspore/profiler/analysis/task_manager.py +1 -1
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
mindspore/profiler/common/constant.py +16 -0
mindspore/profiler/common/profiler_context.py +25 -27
mindspore/profiler/common/profiler_info.py +0 -16
mindspore/profiler/common/profiler_op_analyse.py +235 -0
mindspore/profiler/common/profiler_output_path.py +23 -8
mindspore/profiler/common/profiler_parameters.py +128 -35
mindspore/profiler/dynamic_profile/__init__.py +0 -0
mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
mindspore/profiler/dynamic_profiler.py +305 -314
mindspore/profiler/envprofiler.py +12 -7
mindspore/profiler/experimental_config.py +96 -6
mindspore/profiler/mstx.py +33 -12
mindspore/profiler/platform/__init__.py +2 -3
mindspore/profiler/platform/npu_profiler.py +29 -19
mindspore/profiler/profiler.py +35 -19
mindspore/profiler/profiler_action_controller.py +64 -76
mindspore/profiler/schedule.py +10 -4
mindspore/rewrite/common/config.py +1 -0
mindspore/rewrite/common/namer.py +1 -0
mindspore/rewrite/common/namespace.py +1 -0
mindspore/rewrite/node/node.py +31 -11
mindspore/rewrite/parsers/assign_parser.py +1 -1
mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +7 -10
mindspore/runtime/__init__.py +5 -5
mindspore/runtime/event.py +10 -4
mindspore/runtime/executor.py +60 -45
mindspore/runtime/memory.py +30 -32
mindspore/runtime/thread_bind_core.py +298 -164
mindspore/safeguard/rewrite_obfuscation.py +12 -13
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/_utils.py +14 -4
mindspore/train/amp.py +43 -20
mindspore/train/callback/__init__.py +5 -5
mindspore/train/callback/_checkpoint.py +3 -6
mindspore/train/callback/_flops_collector.py +1 -1
mindspore/train/callback/_landscape.py +0 -1
mindspore/train/callback/_train_fault_tolerance.py +97 -16
mindspore/train/data_sink.py +11 -2
mindspore/train/dataset_helper.py +9 -0
mindspore/train/model.py +135 -55
mindspore/train/serialization.py +133 -111
mindspore/train/summary/summary_record.py +13 -2
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +3 -2
mindspore/utils/dryrun.py +0 -6
mindspore/utils/runtime_execution_order_check.py +163 -77
mindspore/utils/sdc_detect.py +68 -0
mindspore/utils/utils.py +6 -9
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
{mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +356 -394
mindspore/_deprecated/jit.py +0 -198
mindspore/experimental/es/__init__.py +0 -22
mindspore/experimental/es/embedding_service.py +0 -891
mindspore/experimental/es/embedding_service_layer.py +0 -581
mindspore/profiler/parser/__init__.py +0 -14
mindspore/profiler/parser/aicpu_data_parser.py +0 -272
mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
mindspore/profiler/parser/ascend_flops_generator.py +0 -116
mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
mindspore/profiler/parser/ascend_memory_generator.py +0 -185
mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
mindspore/profiler/parser/ascend_op_generator.py +0 -334
mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
mindspore/profiler/parser/base_timeline_generator.py +0 -483
mindspore/profiler/parser/container.py +0 -229
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
mindspore/profiler/parser/flops_parser.py +0 -531
mindspore/profiler/parser/framework_enum.py +0 -111
mindspore/profiler/parser/framework_parser.py +0 -464
mindspore/profiler/parser/framework_struct.py +0 -61
mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
mindspore/profiler/parser/hccl_parser.py +0 -573
mindspore/profiler/parser/hwts_log_parser.py +0 -122
mindspore/profiler/parser/integrator.py +0 -526
mindspore/profiler/parser/memory_usage_parser.py +0 -277
mindspore/profiler/parser/minddata_analyzer.py +0 -800
mindspore/profiler/parser/minddata_parser.py +0 -186
mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
mindspore/profiler/parser/op_intermediate_parser.py +0 -149
mindspore/profiler/parser/optime_parser.py +0 -250
mindspore/profiler/parser/profiler_info.py +0 -213
mindspore/profiler/parser/step_trace_parser.py +0 -666
{mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0

mindspore/ops/auto_generate/gen_ops_def.py CHANGED Viewed

@@ -201,9 +201,9 @@ def adaptive_avg_pool1d(input, output_size):
     Examples:
         >>> import mindspore
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor([[2,3],[3,4]],dtype=mindspore.float16)
-        >>> output = mint.nn.functional.adaptive_avg_pool1d(input, 3)
+        >>> output = ops.auto_generate.adaptive_avg_pool1d(input, 3)
         >>> print(output)
         [[2.  2.5 3. ]
          [3.  3.5 4. ]]
@@ -442,8 +442,7 @@ def apply_rotary_pos_emb_(query, key, cos, sin, position_ids, cos_format=0):
     r"""
     """
-    apply_rotary_pos_emb_op = _get_cache_prim(ApplyRotaryPosEmb)(cos_format)
-    return apply_rotary_pos_emb_op(query, key, cos, sin, position_ids)
+    return apply_rotary_pos_emb_impl(query, key, cos, sin, position_ids, cos_format)
 def argmax_ext(input, dim=None, keepdim=False):
@@ -527,9 +526,9 @@ def argmin_ext(input, dim=None, keepdim=False):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor
-        >>> from mindspore import mint
+        >>> from mindspore import ops
         >>> x = Tensor(np.array([[1, 20, 5], [67, 8, 9], [130, 24, 15]]).astype(np.float32))
-        >>> output = mint.argmin(x, dim=-1)
+        >>> output = ops.auto_generate.argmin_ext(x, dim=-1)
         >>> print(output)
         [0 1 2]
     """
@@ -566,14 +565,13 @@ def argsort_ext(input, dim=-1, descending=False, stable=False):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> import mindspore.mint as mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), mindspore.float16)
-        >>> sort = mint.argsort(x)
+        >>> sort = ops.auto_generate.argsort_ext(x)
         >>> print(sort)
         [[2 1 0]
-        [2 0 1]
-        [0 1 2]]
+         [2 0 1]
+         [0 1 2]]
     """
     return argsort_op(input, dim, descending, stable)
@@ -819,7 +817,7 @@ def atan2_ext(input, other):
         >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.array([0, 1]), mindspore.float32)
         >>> other = Tensor(np.array([1, 1]), mindspore.float32)
-        >>> output = mint.atan2(input, other)
+        >>> output = ops.auto_generate.atan2_ext(input, other)
         >>> print(output)
         [0.        0.7853982]
     """
@@ -979,9 +977,9 @@ def avg_pool1d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
-        >>> output = mint.nn.functional.avg_pool1d(input_x, kernel_size=6, stride=1)
+        >>> output = ops.auto_generate.avg_pool1d_ext(input_x, kernel_size=6, stride=1)
         >>> print(output.shape)
         (1, 3, 1)
     """
@@ -1086,14 +1084,14 @@ def bincount_ext(input, weights=None, minlength=0):
         ``Ascend``
     Examples:
-        >>> from mindspore import mint, Tensor
-        >>> print(mint.bincount(Tensor(np.arange(5))))
+        >>> from mindspore import ops, Tensor
+        >>> print(ops.auto_generate.bincount_ext(Tensor(np.arange(5))))
         [1 1 1 1 1]
-        >>> print(mint.bincount(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
+        >>> print(ops.auto_generate.bincount_ext(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
         [1 3 1 1 0 0 0 1]
         >>> w = Tensor(np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6])) # weights
         >>> x = Tensor(np.array([0, 1, 1, 2, 2, 2]))
-        >>> print(mint.bincount(x,  weights=w, minlength=5))
+        >>> print(ops.auto_generate.bincount_ext(x,  weights=w, minlength=5))
         [0.3 0.7 1.1 0.  0. ]
     """
     return bincount_ext_op(input, weights, minlength)
@@ -1184,7 +1182,7 @@ def broadcast_to(input, shape):
     Args:
         input (Tensor): The input tensor.
-        shape (tuple): The target shape.
+        shape (tuple[int]): The target shape.
     Returns:
         Tensor
@@ -1209,6 +1207,84 @@ def broadcast_to(input, shape):
     """
     return broadcast_to_impl(input, shape)
+def broadcast_to_view(input, shape):
+    r"""
+    Broadcasts input tensor to a given shape. The dim of input shape must be smaller
+    than or equal to that of target shape. Suppose input shape is :math:`(x_1, x_2, ..., x_m)`,
+    target shape is :math:`(*, y_1, y_2, ..., y_m)`, where :math:`*` means any additional dimension.
+    The broadcast rules are as follows:
+    Compare the value of :math:`x_m` and :math:`y_m`, :math:`x_{m-1}` and :math:`y_{m-1}`, ...,
+    :math:`x_1` and :math:`y_1` consecutively and
+    decide whether these shapes are broadcastable and what the broadcast result is.
+    If the value pairs at a specific dim are equal, then that value goes right into that dim of output shape.
+    With an input shape :math:`(2, 3)`, target shape :math:`(2, 3)` , the inferred output shape is :math:`(2, 3)`.
+    If the value pairs are unequal, there are three cases:
+    Case 1: If the value of the target shape in the dimension is -1, the value of the
+    output shape in the dimension is the value of the corresponding input shape in the dimension.
+    With an input shape :math:`(3, 3)`, target
+    shape :math:`(-1, 3)`, the output shape is :math:`(3, 3)`.
+    Case 2: If the value of target shape in the dimension is not -1, but the corresponding
+    value in the input shape is 1, then the corresponding value of the output shape
+    is that of the target shape. With an input shape :math:`(1, 3)`, target
+    shape :math:`(8, 3)`, the output shape is :math:`(8, 3)`.
+    Case 3: If the corresponding values of the two shapes do not satisfy the above cases,
+    it means that broadcasting from the input shape to the target shape is not supported.
+    So far we got the last m dims of the outshape, now focus on the first :math:`*` dims, there are
+    two cases:
+    If the first :math:`*` dims of output shape does not have -1 in it, then fill the input
+    shape with ones until their length are the same, and then refer to
+    Case 2 mentioned above to calculate the output shape. With target shape :math:`(3, 1, 4, 1, 5, 9)`,
+    input shape :math:`(1, 5, 9)`, the filled input shape will be :math:`(1, 1, 1, 1, 5, 9)` and thus the
+    output shape is :math:`(3, 1, 4, 1, 5, 9)`.
+    If the first :math:`*` dims of output shape have -1 in it, it implies this -1 is corresponding to
+    a non-existing dim so they're not broadcastable. With target shape :math:`(3, -1, 4, 1, 5, 9)`,
+    input shape :math:`(1, 5, 9)`, instead of operating the dim-filling process first, it raises errors directly.
+    Args:
+        input (Tensor): The input Tensor.
+        shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
+                      where it will be substituted by the input tensor's shape in that position, see example.
+    Returns:
+        Tensor, with the given `shape` and the same data type as `input`.
+    Raises:
+        TypeError: If `shape` is not a tuple.
+        ValueError: If the target and input shapes are incompatible, or if a - 1 in the target shape is in an invalid
+                    location.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor
+        >>> from mindspore.ops.auto_generate import BroadcastToView
+        >>> shape = (2, 3)
+        >>> x = Tensor(np.array([1, 2, 3]).astype(np.float32))
+        >>> output = BroadcastToView()(x, shape)
+        >>> print(output)
+        [[1. 2. 3.]
+         [1. 2. 3.]]
+        >>> shape = (-1, 2)
+        >>> x = Tensor(np.array([[1], [2]]).astype(np.float32))
+        >>> output = BroadcastToView()(x, shape)
+        >>> print(output)
+        [[1. 1.]
+         [2. 2.]]
+    """
+    return broadcast_to_view_op(input, shape)
 cast_op=Cast()
 def cast(input, dtype):
@@ -1750,7 +1826,7 @@ def correlate(a, v, pad_mode='valid'):
     Note:
         - `correlate` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `correlate` is not supported on Windows platform yet.
     Args:
@@ -1960,7 +2036,7 @@ def cummin_ext(input, dim):
         \end{array}
     .. note::
-        O2 mode is not supported in Ascend.
+        GE backend is not supported in Ascend.
     Args:
         input (Tensor): The input Tensor, The dimension must be greater than 0.
@@ -2040,61 +2116,6 @@ def cumsum_ext(input, dim, dtype=None):
     return cumsum_ext_op(input, dim, dtype)
-def decoder_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len):
-    r"""
-    The DecoderKVCache is used for decoding the KVCache of transformer network.
-    Args:
-        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
-          When format is BHSD, cache tensor of shape
-          :math:`(batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
-          When format is BSD, cache tensor of shape
-          :math:`(batch\_size, max\_seq\_length, hidden\_size)`.
-        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
-          When format is BHSD, update tensor of shape
-          :math:`(batch\_size, num\_head, update\_seq\_length, size\_pre\_head)`.
-          When format is BSD, update tensor of shape
-          :math:`(batch\_size, update\_seq\_length, hidden\_size)`.
-        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
-          Valid_seq_len tensor of shape :math:`(batch\_size)`.
-        batch_index (Tensor): The batch_index tensor with data type of int64.
-          Batch_index tensor of shape :math:`(batch\_size)`. Indicate that which batch of cache tensor is going to be update. Not abel for now.
-        seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
-        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
-          New_max_seq_len tensor of shape :math:`(1)`.
-          Indicate that user want to change the shape of cache tensor from
-          :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`. to
-          :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`.
-          to update the cache tensor. This will not real change the shape of `cache` tensor.
-        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
-          Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
-    Outputs:
-        With same data type and same shape as `cache` tensor.
-    Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> from mindspore.ops.operations import _inner_ops
-        >>> b = 4
-        >>> h = 40
-        >>> max_s = 1024
-        >>> s = 1
-        >>> d = 128
-        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
-        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
-        >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=b).astype(np.int64))
-        >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=b, replace=False).astype(np.int64))
-        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
-        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
-        >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
-        >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
-        >>> print(cache)
-    """
-    return decoder_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
 def dense(input, weight, bias=None):
     r"""
     Applies the dense connected operation to the `input`. The dense function is defined as:
@@ -2233,9 +2254,9 @@ def diag_ext(input, diagonal=0):
         ``Ascend``
     Examples:
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor([1, 2, 3, 4]).astype('int32')
-        >>> output = mint.diag(input)
+        >>> output = ops.auto_generate.diag_ext(input)
         >>> print(output)
         [[1 0 0 0]
          [0 2 0 0]
@@ -2331,10 +2352,10 @@ def dot(input, other):
     Examples:
         >>> import mindspore
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor([2.0, 3.0], mindspore.float32)
         >>> y = Tensor([2.0, 1.0], mindspore.float32)
-        >>> output = mint.dot(x, y)
+        >>> output = ops.auto_generate.dot(x, y)
         >>> print(output)
         7.0
         >>> print(output.dtype)
@@ -2781,6 +2802,46 @@ def expand_dims(input_x, axis):
     return expand_dims_op(input_x, axis)
+def expand_dims_view(input, dim):
+    r"""
+    Adds an additional dimension to `input_x` at the given axis, the dimension
+    of `input_x` should be greater than or equal to 1.
+    Note:
+        If the specified axis is a negative number, the index is counted
+        backward from the end and starts at 1.
+    Args:
+        input_x (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+        axis (int): Specifies the dimension index at which to expand
+            the shape of `input_x`. The value of axis must be in the range
+            `[-input_x.ndim-1, input_x.ndim]`. Only constant value is allowed.
+    Returns:
+        Tensor, the shape of tensor is :math:`(1, x_1, x_2, ..., x_R)` if the
+        value of `axis` is 0. It has the same data type as `input_x`.
+    Raises:
+        TypeError: If `axis` is not an int.
+        ValueError: If `axis` is not in the valid range :math:`[-a.ndim-1, a.ndim]`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore.ops.auto_generate import ExpandDimsView
+        >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
+        >>> output = ExpandDimsView()(input_tensor, 0)
+        >>> print(output)
+        [[[2. 2.]
+          [2. 2.]]]
+    """
+    return expand_dims_view_op(input, dim)
 def expm1(input):
     r"""
     Compute exponential of the input tensor, then minus 1, element-wise.
@@ -2936,7 +2997,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `fft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `fft2` is not supported on Windows platform yet.
     Args:
@@ -3000,7 +3061,7 @@ def fftfreq(n, d=1.0, dtype=None):
     Note:
         - `fftfreq` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `fftfreq` is not supported on Windows platform yet.
     Args:
@@ -3033,7 +3094,7 @@ def fftn(input, s=None, dim=None, norm=None):
     Note:
         - `fftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `fftn` is not supported on Windows platform yet.
     Args:
@@ -3093,7 +3154,7 @@ def fftshift(input, dim=None):
     Note:
         - `fftshift` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `fftshift` is not supported on Windows platform yet.
     Args:
@@ -3129,7 +3190,7 @@ def fft(input, n=None, dim=-1, norm=None):
     Note:
         - `fft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `fft` is not supported on Windows platform yet.
     Args:
@@ -3388,6 +3449,13 @@ def frac_ext(input):
     return frac_op(input)
+def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5, mapping_num=None, mapping_table=None, enable_expert_mapping=False):
+    r"""
+    """
+    return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale, mapping_num, mapping_table, enable_expert_mapping)
 def gather_d(x, dim, index):
     r"""
     Gathers elements along an axis specified by dim.
@@ -3579,20 +3647,6 @@ def geqrf(input):
     return geqrf_op(input)
-def gmm_backward(grad, x, weight, group_list=None):
-    r"""
-    """
-    return gmm_backward_op(grad, x, weight, group_list)
-def gmm_v2_backward(grad, x, weight, group_list=None, group_list_type=0):
-    r"""
-    """
-    return gmm_v2_backward_op(grad, x, weight, group_list, group_list_type)
 def greater_equal(input, other):
     r"""
     Compute the value of :math:`input >= other` element-wise.
@@ -3675,7 +3729,7 @@ def hfft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `hfft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `hfft2` is not supported on Windows platform yet.
     Args:
@@ -3736,7 +3790,7 @@ def hfftn(input, s=None, dim=None, norm=None):
     Note:
         - `hfftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `hfftn` is not supported on Windows platform yet.
     Args:
@@ -3797,7 +3851,7 @@ def hfft(input, n=None, dim=-1, norm=None):
     Note:
         - `hfft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `hfft` is not supported on Windows platform yet.
     Args:
@@ -4058,7 +4112,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `ifft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ifft2` is not supported on Windows platform yet.
     Args:
@@ -4118,7 +4172,7 @@ def ifftn(input, s=None, dim=None, norm=None):
     Note:
         - `ifftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ifftn` is not supported on Windows platform yet.
     Args:
@@ -4178,7 +4232,7 @@ def ifftshift(input, dim=None):
     Note:
         - `ifftshift` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ifftshift` is not supported on Windows platform yet.
     Args:
@@ -4214,7 +4268,7 @@ def ifft(input, n=None, dim=-1, norm=None):
     Note:
         - `ifft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ifft` is not supported on Windows platform yet.
     Args:
@@ -4270,7 +4324,7 @@ def ihfft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `ihfft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ihfft2` is not supported on Windows platform yet.
     Args:
@@ -4331,7 +4385,7 @@ def ihfftn(input, s=None, dim=None, norm=None):
     Note:
         - `ihfftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ihfftn` is not supported on Windows platform yet.
     Args:
@@ -4392,7 +4446,7 @@ def ihfft(input, n=None, dim=-1, norm=None):
     Note:
         - `ihfft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `ihfft` is not supported on Windows platform yet.
     Args:
@@ -4513,56 +4567,6 @@ def unfold_ext(input, kernel_size, dilation=1, padding=0, stride=1):
     return im2col_ext_op(input, kernel_size, dilation, padding, stride)
-def index_add_ext(input, dim, index, source, alpha=1):
-    r"""
-    Accumulate the elements of `alpha` times `source` into the `input` by adding to the index in the order given in `index`. For example, if ``dim == 0`` , ``index[i] == j`` , and ``alpha = -1`` , then the `i` th row of `source` is subtracted from the `j` th row of `input` . The `dim` th dimension of `source` must have the same size as the length of `index` , and all other dimensions must match `input`, or an error will be raised. For a 3-D tensor, the output is defined as follows:
-    .. math::
-        \begin{array}{ll}
-        input[index[i],\ :,\ :]\ +=\ alpha * source[i,\ :,\ :]  \qquad \#if\ dim == 0 \\
-        input[:,\ \ index[i],\ :]\ +=\ alpha * source[:,\ \ i,\ :]  \qquad \#if\ dim == 1 \\
-        input[:,\ :,\ \ index[i]]\ +=\ alpha * source[:,\ :,\ \ i]  \qquad\#if\ dim == 2 \\
-        \end{array}
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
-    Args:
-        input (Tensor): The input Tensor.
-        dim (int): The dimension along which to index.
-        index (Tensor): Add the value of "input Tensor" and `source` along the dimension of the `dim` according to the specified index value, with data type int32. The `index` must be 1D with the same size as the size of `source` in the `dim` dimension. The values of `index` should be in [0, b), where the b is the size of "input Tensor" in the `dim` dimension.
-        source (Tensor): The input tensor with the value to add. Must have same data type as "input Tensor". The shape must be the same as "input Tensor" except the `dim` th dimension.
-        alpha (number, optional): The scalar multiplier for source. Default: ``1``.
-    Returns:
-        Tensor, has the same shape and dtype as `input`.
-    Raises:
-        TypeError: If neither `index` nor `source` is a Tensor.
-        ValueError: If the value of `dim` is out of the dimension range of `source` shape.
-        ValueError: If `index` rank is not the same as `source` rank.
-        ValueError: If shape of `index` is not 1D or size of `index` is not equal to dimension of source[dim].
-        ValueError: If the shape of `source` is not the same as that of `input` except the `dim` axis.
-    Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> import numpy as np
-        >>> import mindspore
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
-        >>> index = Tensor(np.array([0, 2]), mindspore.int32)
-        >>> y = Tensor(np.array([[0.5, 1.0], [1.0, 1.5], [2.0, 2.5]]), mindspore.float32)
-        >>> output = ops.auto_generate.index_add_ext(x, 1, index, y, alpha=1)
-        >>> print(output)
-        [[ 1.5  2.   4. ]
-         [ 5.   5.   7.5]
-         [ 9.   8.  11.5]]
-    """
-    return index_add_ext_op(input, dim, index, source, alpha)
 def index_fill_scalar(input, dim, index, value):
     r"""
@@ -4673,6 +4677,13 @@ def index_select_ext(input, dim, index):
     return index_select_op(input, dim, index)
+def inner_moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
+    r"""
+    """
+    return inner_moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
 def inplace_adds_ext(input, other, alpha=1):
     r"""
@@ -4903,9 +4914,9 @@ def inplace_hardtanh(input, min_val=-1, max_val=1):
     Examples:
         >>> import mindspore
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor([-1, -2, 0, 2, 1], mindspore.float16)
-        >>> mint.hardtanh_(x, min_val=-1.0, max_val=1.0)
+        >>> ops.auto_generate.inplace_hardtanh(x, min_val=-1.0, max_val=1.0)
         >>> print(x)
         [-1. -1.  0.  1.  1.]
     """
@@ -4980,6 +4991,51 @@ def masked_fill_tensor_(input, mask, value):
     return inplace_masked_fill_tensor_op(input, mask, value)
+def matmul_add_(x, weight, C):
+    r"""
+    Fusion Operator of Transpose, Matmul, and InplaceAdd.
+    .. warning::
+        - This is an experimental API that is subject to change or deletion.
+        - This API is only supported in Atlas A2 training series for now.
+        - This API is only supported on GRAPH mode.
+    Args:
+        x (Tensor): Matrix A in matrix multiplication, with shape :math:`(k, m)` or :math:`(batch, k, m)`,
+            whose type should be float16 or bfloat16.
+        weight (Tensor): Matrix B in matrix multiplication, with shape :math:`(k, n)` or :math:`(batch, k, n)`,
+            whose type should be float16 or bfloat16.
+        C (Tensor): A Tensor acting as both input and output, with type of float32.
+            It's shape should be :math:`(m, n)` or :math:`(batch, m, n)`.
+    Returns:
+        Tensor, has the same shape and data type as `C`.
+    Raises:
+        TypeError: If the dtype of `weight` is not the same as `x`.
+        ValueError: If the ranks of `x` , `weight` and `C` are not the same.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops, nn, context
+        >>> context.set_context(mode=context.GRAPH_MODE, jit_config={"jit_level": "O0"})
+        >>> class Net(nn.Cell):
+        ...     def construct(self, x, weight, C):
+        ...        return ops.auto_generate.inplace_matmul_add_op(x, weight, C)
+        >>> x = Tensor(np.random.randn(10, 20), mindspore.float16)
+        >>> weight = Tensor(np.random.randn(10, 8), mindspore.float16)
+        >>> C = Tensor(np.random.randn(20, 8), mindspore.float32)
+        >>> output = Net()(x, weight, C)
+        >>> print(output.shape)
+        (20, 8)
+    """
+    return inplace_matmul_add_op(x, weight, C)
 def inplace_muls(input, other):
     r"""
@@ -5008,6 +5064,52 @@ def inplace_scatter_add(input, dim, index, src):
     return inplace_scatter_add_op(input, dim, index, src)
+def inplace_silu(input):
+    r"""
+    Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
+    .. math::
+        \text{SiLU}(x) = x * \sigma(x),
+    where :math:`x` is an element of the input, :math:`\sigma(x)` is Sigmoid function.
+    .. math::
+        \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
+    SiLU Function Graph:
+    .. image:: ../images/SiLU.png
+        :align: center
+    Args:
+        input (Tensor): `input` is :math:`x` in the preceding formula. Input with the data type
+            float16 or float32.
+        inplace (bool, optional): If it is ``True``, enable the in place update function.
+            Default value: ``False``.
+    Returns:
+        Tensor, with the same type and shape as the `input`.
+    Raises:
+        TypeError: If dtype of `input` is neither float16 nor float32.
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, mint
+        >>> import numpy as np
+        >>> input = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
+        >>> output = mint.nn.functional.silu(input, inplace=True)
+        >>> print(output)
+        [-0.269  1.762  -0.1423  1.762  -0.269]
+    """
+    return inplace_silu_op(input)
 def inplace_stop_gradient(input):
     r"""
@@ -5049,9 +5151,6 @@ def inplace_threshold(input, threshold, value):
         \text{value}, &\text{ otherwise }
         \end{cases}
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     Args:
         input (Tensor): The input Tensor.
         threshold (Union[int, float]): The value of the threshold.
@@ -5092,7 +5191,7 @@ def irfft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `irfft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `irfft2` is not supported on Windows platform yet.
     Args:
@@ -5150,7 +5249,7 @@ def irfftn(input, s=None, dim=None, norm=None):
     Note:
         - `irfftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `irfftn` is not supported on Windows platform yet.
     Args:
@@ -5209,7 +5308,7 @@ def irfft(input, n=None, dim=-1, norm=None):
     Note:
         - `irfft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `irfft` is not supported on Windows platform yet.
     Args:
@@ -5376,12 +5475,12 @@ def kthvalue(input, k, dim=-1, keepdim=False):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input_x = Tensor(np.array([[1.01, 2.02, 3.03], [1.04, 2.05, 3.06]]), mindspore.float32)
-        >>> out = mint.kthvalue(input_x, 2, 1, False)
+        >>> out = ops.auto_generate.kthvalue(input_x, 2, 1, False)
         >>> print(out)
         (Tensor(shape=[2], dtype=Float32, value= [ 2.01999998e+00,  2.04999995e+00]), Tensor(shape=[2], dtype=Int64, value= [1, 1]))
-        >>> out1 = mint.kthvalue(input_x, 2, 1, True)
+        >>> out1 = ops.auto_generate.kthvalue(input_x, 2, 1, True)
         >>> print(out1)
         (Tensor(shape=[2, 1], dtype=Float32, value=
         [[ 2.01999998e+00],
@@ -5669,9 +5768,9 @@ def linalg_qr(A, mode='reduced'):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([[1.0, 1.0, 2.0, 4.0], [1.0, 1.0, 2.0, 4.0]]), mindspore.float32)
-        >>> output = mint.linalg.qr(x)
+        >>> output = ops.auto_generate.linalg_qr(x)
         >>> print(output)
         (Tensor(shape=[2, 2], dtype=Float32, value=
         [[-7.07106829e-01, -7.07106769e-01],
@@ -5713,9 +5812,9 @@ def log10_ext(input):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
-        >>> output = mint.log10(x)
+        >>> output = ops.auto_generate.log10_ext(x)
         >>> print(output)
         [0.47712136 0.69897    0.845098  ]
     """
@@ -5775,9 +5874,9 @@ def log2_ext(input):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
-        >>> output = mint.log2(x)
+        >>> output = ops.auto_generate.log2_ext(x)
         >>> print(output)
         [1.5849625 2.321928  2.807355 ]
     """
@@ -5810,10 +5909,10 @@ def logaddexp2(input, other):
     Examples:
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> x1 = Tensor(np.array([1, 2, 3]).astype(np.float16))
         >>> x2 = Tensor(np.array(2).astype(np.float16))
-        >>> output = mint.logaddexp2(x1, x2)
+        >>> output = ops.auto_generate.logaddexp2(x1, x2)
         >>> print(output)
         [2.586 3. 3.586]
     """
@@ -6495,6 +6594,254 @@ def mm_ext(input, mat2):
     return mm_ext_op(input, mat2)
+def moe_distribute_combine(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts=None, x_active_mask=None, activate_scale=None, weight_scale=None, group_list=None, expand_scales=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_export_rank_num=0, global_bs=0, out_dtype=0, common_quant_mode=0, group_list_type=0):
+    r"""
+    Parallel communication for Mixture of Experts (MoE). When Tensor Parallelism (TP) communication exists,
+    it first ReduceScatter performs communication followed by Expert Parallelism (EP) AllToAllV communication.
+    Otherwise, only EP AllToAllV communication is performed. Finally multiply the received data by weight and
+    add them up.
+    Notes:
+        This function must be used in conjunction with function `moe_distribute_dispatch`.
+        - A: Maximum tokens to dispatch per rank:
+            - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
+            - For MoE experts:
+                - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
+                - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
+        - H (hidden size): Dimension of each token's hidden state
+            - Ascend 910B: 0 < H <= 7168, must be multiple of 32
+            - Ascend 910_93: H = 7168
+        - BS (batch sequence size): Number of tokens processed per rank
+            - Ascend 910B: 0 < BS <= 256
+            - Ascend 910_93: 0 < BS <= 512
+        - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
+        - server_num: Number of server nodes (supports 2, 4, 8)
+        - local_expert_num: Number of experts per rank:
+            - Shared expert ranks: local_expert_num = 1
+            - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
+            (TP communication not supported when localExpertNum > 1)
+    Inputs:
+        - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
+            Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
+        - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
+            Format: ND, non-contiguous allowed.
+        - **expert_idx** (Tensor) - Token counts per expert, it's the output of dispatch operation.
+            1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
+        - **ep_send_counts** (Tensor) - Tokens that each EP rank needs to send, it's the output of dispatch operation.
+            - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
+            - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
+            Format: ND, non-contiguous allowed.
+        - **expert_scales** (Tensor) - Top-K expert weights per token.
+        - **ep_world_size** (int) - EP domain size.
+            - Ascend 910B: Supports 16, 32, 64.
+            - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
+        - **ep_rank_id** (int) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
+        - **moe_expert_num** (int) - Number of MoE experts (0 < moe_expert_num <= 256),
+            must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
+        - **tp_send_counts** (Tensor) - Tokens that each TP rank needs to send (when TP exists). It's the output of dispatch operation. Default: ``None``.
+            - Ascend 910B: Not supported.
+            - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
+        - **x_active_mask** (Tensor) - Reserved parameter. Default: ``None``.
+        - **activate_scale** (Tensor) - Reserved parameter. Default: ``None``.
+        - **weight_scale** (Tensor) - Reserved parameter. Default: ``None``.
+        - **group_list** (Tensor) - Reserved parameter. Default: ``None``.
+        - **expand_scales** (Tensor) - Output of dispatch operation. Default: ``None``.
+            - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
+            - Ascend 910_93: Unsupported.
+        - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp. Default: ``None``.
+        - **group_tp** (str) - TP communication domain name. Default: ``None``.
+            - Ascend 910B: Unsupported (pass empty string).
+            - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
+        - **tp_world_size** (int) - TP domain size. Default: ``0``.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
+        - **tp_rank_id** (int) - Local rank ID in TP domain. Default: ``0``.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
+        - **expert_shard_type** (int) - Shared expert distribution type. Default: ``0``.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
+        - **shared_expert_num** (int) - Number of shared experts. Default: ``0``.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
+        - **shared_expert_rank_num** (int) - Number of ranks hosting shared experts. Default: ``0``.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
+        - **global_bs** (int) - Global batch size across EP domain. Default: ``0``.
+            - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
+            - Ascend 910_93: 0 or BS*ep_world_size.
+        - **out_dtype** (int) - Specify the type of output x. Reserved parameter (pass 0 in current version). Default: ``0``.
+        - **common_quant_mode** (int) - Communication quantification type. Reserved parameter (pass 0 in current version). Default: ``0``.
+        - **group_list_type** (int) - The format of group_list. Reserved parameter (pass 0 in current version). Default: ``0``.
+    Outputs:
+        - **x** (Tensor) - Processed tokens. 2D tensor [BS, H] with dtype matching input `expand_x`.
+    Raises:
+        TypeError: If input dtypes don't match specifications.
+        ValueError: If input values violate constraints (e.g., invalid expert indices).
+        RuntimeError: If communication domain configuration is invalid.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+    >>> # EP-only communication example (Ascend 910B)
+    >>> import mindspore as ms
+    >>> from mindspore import Tensor
+    >>> from mindspore import ops
+    >>> from mindspore.communication import init, get_rank, GlobalComm
+    >>> from mindspore.ops.auto_generate import moe_distribute_dispatch, moe_distribute_combine
+    >>> import numpy as np
+    >>> bs = 8
+    >>> h = 7168
+    >>> k = 8
+    >>> ep_world_size = 16
+    >>> moe_expert_num = 16
+    >>> global_bs = bs * ep_world_size
+    >>> x = Tensor(np.random.randn(bs, h), ms.float16)
+    >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
+    >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
+    >>> init()
+    >>> rank_id = get_rank()
+    >>> expand_x, _, expand_idx, _, ep_recv_count, _, expand_scale = moe_distribute_dispatch(
+    ...     x, expert_ids, expert_scales, ep_world_size, rank_id, moe_expert_num,
+    ...     group_ep=GlobalComm.WORLD_COMM_GROUP)
+    >>> out_x = moe_distribute_combine(
+    ...     expand_x, expert_ids, expand_idx, ep_recv_count, expert_scales, ep_world_size, rank_id,
+    ...     moe_expert_num, group_ep=GlobalComm.WORLD_COMM_GROUP)
+    >>> print(out_x.shape)
+    (8, 7168)
+    """
+    return moe_distribute_combine_op(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts, x_active_mask, activate_scale, weight_scale, group_list, expand_scales, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_export_rank_num, global_bs, out_dtype, common_quant_mode, group_list_type)
+def moe_distribute_dispatch(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales=None, scales=None, x_active_mask=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_expert_rank_num=0, quant_mode=0, global_bs=0, expert_token_nums_type=0):
+    r"""
+    Performs token data quantization (optional) and parallel communication for Mixture of Experts (MoE).
+    When Tensor Parallelism (TP) communication exists, it first performs Expert Parallelism (EP) AllToAllV
+    communication followed by TP AllGatherV communication. Otherwise, only EP AllToAllV communication is performed.
+    Notes:
+        - A: Maximum tokens to dispatch per rank:
+            - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
+            - For MoE experts:
+                - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
+                - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
+        - H (hidden size): Dimension of each token's hidden state
+            - Ascend 910B: 0 < H <= 7168, must be multiple of 32
+            - Ascend 910_93: H = 7168
+        - BS (batch sequence size): Number of tokens processed per rank
+            - Ascend 910B: 0 < BS <= 256
+            - Ascend 910_93: 0 < BS <= 512
+        - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
+        - server_num: Number of server nodes (supports 2, 4, 8)
+        - local_expert_num: Number of experts per rank:
+            - Shared expert ranks: local_expert_num = 1
+            - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
+            (TP communication not supported when localExpertNum > 1)
+    Inputs:
+        - **x** (Tensor) - Input token data to be sent. 2D tensor with shape [BS, H].
+            Supported dtypes: float16, bfloat16. Format: ND, non-contiguous allowed.
+        - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
+            Format: ND, non-contiguous allowed.
+        - **ep_world_size** (int64) - EP domain size.
+            - Ascend 910B: Supports 16, 32, 64.
+            - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
+        - **ep_rank_id** (int64) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
+        - **moe_expert_num** (int64) - Number of MoE experts (0 < moe_expert_num <= 256),
+            must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
+        - **expert_scales** (Tensor) - Top-K expert weights per token.
+            - Ascend 910B: 2D float32 tensor [BS, K], ND format, non-contiguous allowed.
+            - Ascend 910_93: Unsupported (pass nullptr).
+        - **scales** (Tensor) - Expert weights. 2D float32 tensor with shape [shared_expert_num + moe_expert_num, H].
+            Pass nullptr for non-quantized scenarios. Format: ND, non-contiguous allowed.
+            Note: On Ascend 910B, must be nullptr when HCCL_INTRA_PCIE_ENABLE=1 and HCCL_INTRA_ROCE_ENABLE=0.
+        - **x_active_mask** (Tensor) - Reserved parameter (pass nullptr in current version).
+        - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp.
+        - **group_tp** (str) - TP communication domain name.
+            - Ascend 910B: Unsupported (pass empty string).
+            - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
+        - **tp_world_size** (int64) - TP domain size.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
+        - **tp_rank_id** (int64) - Local rank ID in TP domain.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
+        - **expert_shard_type** (int64) - Shared expert distribution type.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
+        - **shared_expert_num** (int64) - Number of shared experts.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
+        - **shared_expert_rank_num** (int64) - Number of ranks hosting shared experts.
+            - Ascend 910B: Unsupported (pass 0).
+            - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
+        - **quant_mode** (int64) - Quantization mode: 0 (none), 2 (dynamic quantization).
+        - **global_bs** (int64) - Global batch size across EP domain.
+            - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
+            - Ascend 910_93: 0 or BS*ep_world_size.
+        - **expert_token_nums_type** (int64) - Semantic meaning of expert_token_nums output:
+            0 (prefix sums), 1 (raw counts).
+    Outputs:
+        - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
+            Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
+        - **dynamic_scales** (Tensor) - Dynamic quantization scales (when quant_mode=2).
+            1D float32 tensor [A]. Format: ND, non-contiguous allowed.
+        - **expand_idx** (Tensor) - Token counts per expert for combine operation.
+            1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
+        - **expert_token_nums** (Tensor) - Tokens received per expert.
+            1D int64 tensor [local_expert_num]. Format: ND, non-contiguous allowed.
+        - **ep_recv_counts** (Tensor) - Tokens received from each EP rank.
+            - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
+            - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
+            Format: ND, non-contiguous allowed.
+        - **tp_recv_counts** (Tensor) - Tokens received from each TP rank (when TP exists).
+            - Ascend 910B: Not supported.
+            - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
+        - **expand_scales** (Tensor) - Output token weights for combine operation.
+            - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
+            - Ascend 910_93: Unsupported.
+    Raises:
+        TypeError: If input dtypes don't match specifications.
+        ValueError: If input values violate constraints (e.g., invalid expert indices).
+        RuntimeError: If communication domain configuration is invalid.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+    >>> # EP-only communication example (Ascend 910B)
+    >>> import mindspore as ms
+    >>> from mindspore import Tensor
+    >>> from mindspore import ops
+    >>> from mindspore.communication import init, get_rank, GlobalComm
+    >>> from mindspore.ops.auto_generate import moe_distribute_dispatch
+    >>> import numpy as np
+    >>> bs = 8
+    >>> h = 7168
+    >>> k = 8
+    >>> ep_world_size = 16
+    >>> moe_expert_num = 16
+    >>> global_bs = bs * ep_world_size
+    >>> x = Tensor(np.random.randn(bs, h), ms.float16)
+    >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
+    >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
+    >>> init()
+    >>> rank_id = get_rank()
+    >>> out = moe_distribute_dispatch(
+    ...     x, expert_ids, ep_world_size, rank_id, moe_expert_num, expert_scales=expert_scales,
+    ...     group_ep=GlobalComm.WORLD_COMM_GROUP)
+    >>> print(out[0].shape)  # expand_x
+    (128, 7168)
+    """
+    return moe_distribute_dispatch_op(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales, scales, x_active_mask, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_expert_rank_num, quant_mode, global_bs, expert_token_nums_type)
 def moe_token_permute_grad(permuted_tokens_grad, sorted_indices, num_topk=1, padded_mode=False):
     r"""
@@ -6508,11 +6855,10 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
     .. warning::
         - It is only supported on Atlas A2 Training Series Products.
-        - The input `tokens` only supports the bfloat16 data type in the current version.
         - When `indices` is 2-D, the size of the second dim must be less than or equal to 512.
     Args:
-        tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16.
+        tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16, float16 or float32.
             The shape is :math:`(num\_tokens, hidden\_size)` , where `num_tokens` and `hidden_size` are positive integers.
         indices (Tensor): The tensor specifies indices used to permute the tokens. The dtype is int32 or int64.
             The shape is :math:`(num\_tokens, topk)` or :math:`(num\_tokens,)`, where `num_tokens` and `topk` are positive integers.
@@ -6528,7 +6874,6 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
     Raises:
         TypeError: If `tokens` or `indices` is not a Tensor.
-        TypeError: If dtype of `tokens` is not bfloat16.
         TypeError: If dtype of `indices` is not int32 or int64.
         TypeError: If specified `num_out_tokens` is not an integer.
         TypeError: If specified `padded_mode` is not a bool.
@@ -6570,60 +6915,6 @@ def moe_token_unpermute_grad(permuted_tokens, unpermuted_tokens_grad, sorted_ind
     return moe_token_unpermute_grad_op(permuted_tokens, unpermuted_tokens_grad, sorted_indices, probs, padded_mode, restore_shape)
-def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
-    r"""
-    Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
-    .. warning::
-        - It is only supported on Atlas A2 Training Series Products.
-        - The inputs `permuted_tokens` and `probs` only support the bfloat16 data type in the current version.
-        - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
-    Args:
-        permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
-            The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
-        sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
-            The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
-            It only supports the int32 data type.
-        probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
-            If provided, the unpermuted tokens will be merged with their respective probabilities.
-            The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
-        padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
-        restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
-    Returns:
-        Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
-        If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
-    Raises:
-        TypeError: If `permuted_tokens` is not a Tensor.
-        ValueError: Only supported when `padded_mode` is ``False``.
-    Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> import mindspore
-        >>> from mindspore import Tensor, ops
-        >>> permuted_token = Tensor([
-        ...                          [1, 1, 1],
-        ...                          [0, 0, 0],
-        ...                          [0, 0, 0],
-        ...                          [3, 3, 3],
-        ...                          [2, 2, 2],
-        ...                          [1, 1, 1],
-        ...                          [2, 2, 2],
-        ...                          [3, 3, 3]], dtype=mindspore.bfloat16)
-        >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
-        >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
-        >>> out.shape
-        (8, 3)
-    """
-    return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
 def mse_loss_ext(input, target, reduction='mean'):
     r"""
     Calculates the mean squared error between the predicted value and the label value.
@@ -6746,10 +7037,10 @@ def mv(input, vec):
     Examples:
         >>> import mindspore
         >>> import numpy as np
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor(np.array([[3., 4.], [1., 6.], [1., 3.]]).astype(np.float32))
         >>> vec = Tensor(np.array([1., 2.]).astype(np.float32))
-        >>> output = mint.mv(input, vec)
+        >>> output = ops.auto_generate.mv(input, vec)
         >>> print(output)
         [11. 13. 7.]
     """
@@ -6822,14 +7113,14 @@ def narrow(input, dim, start, length):
     Examples:
         >>> import mindspore
-        >>> from mindspore import mint
+        >>> from mindspore import ops
         >>> from mindspore import Tensor
         >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
-        >>> output = mint.narrow(x, 0, 0, 2)
+        >>> output = ops.auto_generate.narrow(x, 0, 0, 2)
         >>> print(output)
         [[ 1 2 3]
          [ 4 5 6]]
-        >>> output = mint.narrow(x, 1, 1, 2)
+        >>> output = ops.auto_generate.narrow(x, 1, 1, 2)
         >>> print(output)
         [[ 2 3]
          [ 5 6]
@@ -6838,6 +7129,47 @@ def narrow(input, dim, start, length):
     return narrow_op(input, dim, start, length)
+def narrow_view(input, dim, start, length):
+    r"""
+    Obtains a tensor of a specified length at a specified start position along a specified axis.
+    Args:
+        input (Tensor): the tensor to narrow.
+        dim (int): the axis along which to narrow.
+        start (Union[int, Tensor[int]]): the starting dimension.
+        length (int): the distance to the ending dimension.
+    Returns:
+        output (Tensors) - The narrowed tensor.
+    Raises:
+        ValueError: the rank of `input` is 0.
+        ValueError: the value of `dim` is out the range [-input.ndim, input.ndim).
+        ValueError: the value of `start` is out the range [-input.shape[dim], input.shape[dim]].
+        ValueError: the value of `length` is out the range [0, input.shape[dim]-start].
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import ops
+        >>> from mindspore.ops.auto_generate import NarrowView
+        >>> from mindspore import Tensor
+        >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
+        >>> output = NarrowView()(x, 0, 0, 2)
+        >>> print(output)
+        [[ 1 2 3]
+         [ 4 5 6]]
+        >>> output = NarrowView()(x, 1, 1, 2)
+        >>> print(output)
+        [[ 2 3]
+         [ 5 6]
+         [ 8 9]]
+    """
+    return narrow_view_op(input, dim, start, length)
 def neg(input):
     r"""
     Returns a tensor with negative values of the input tensor element-wise.
@@ -7156,65 +7488,6 @@ def prod_ext(input, dim=None, keepdim=False, dtype=None):
     return prod_ext_op(input, dim, keepdim, dtype)
-def prompt_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, align_mode='LEFT'):
-    r"""
-    The PromptKVCache is used for prefill the KVCache of transformer network.
-    Args:
-        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
-          When format is BHSD, cache tensor of shape
-          :math:`(cache\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
-          When format is BSD, cache tensor of shape
-          :math:`(cache\_batch\_size, max\_seq\_length, hidden\_size)`.
-        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
-          When format is BHSD, cache tensor of shape
-          :math:`(update\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
-          When format is BSD, cache tensor of shape
-          :math:`(update\_batch\_size, max\_seq\_length, hidden\_size)`.
-        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
-          Valid_seq_len tensor of shape :math:`(update\_batch\_size)`.
-        batch_index (Tensor): The batch_index tensor with data type of int64.
-          Batch_index tensor of shape :math:`(update\_batch\_size)`. Indicate that which batch of cache tensor is going to be update.
-        seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
-        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
-          New_max_seq_len tensor of shape :math:`(1)`.
-          Indicate that user want to change the shape of cache tensor from
-          :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
-          :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
-          to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
-        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
-          Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
-        align_mode (str): indicate which axis is seq_len. Default: left.
-    Outputs:
-        With same data type and same shape as `cache` tensor.
-    Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> from mindspore import Tensor
-        >>> from mindspore.ops.operations import _inner_ops
-        >>> b = 4
-        >>> h = 40
-        >>> max_s = 1024
-        >>> s = 256
-        >>> d = 128
-        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
-        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
-        >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=ub).astype(np.int64))
-        >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=ub, replace=False).astype(np.int64))
-        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
-        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
-        >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
-        >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
-        >>> print(cache)
-    """
-    prompt_k_v_cache_op = _get_cache_prim(PromptKVCache)(align_mode)
-    return prompt_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
 def randperm(n, seed=0, offset=0, dtype=mstype.int64):
     r"""
     Generates random permutation of integers from 0 to n-1.
@@ -7550,7 +7823,7 @@ def rfft2(input, s=None, dim=(-2, -1), norm=None):
     Note:
         - `rfft2` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `rfft2` is not supported on Windows platform yet.
     Args:
@@ -7611,7 +7884,7 @@ def rfftfreq(n, d=1.0, dtype=None):
     Note:
         - `rfftfreq` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `rfftfreq` is not supported on Windows platform yet.
     Args:
@@ -7644,7 +7917,7 @@ def rfftn(input, s=None, dim=None, norm=None):
     Note:
         - `rfftn` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `rfftn` is not supported on Windows platform yet.
     Args:
@@ -7704,7 +7977,7 @@ def rfft(input, n=None, dim=-1, norm=None):
     Note:
         - `rfft` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `rfft` is not supported on Windows platform yet.
     Args:
@@ -8077,7 +8350,7 @@ def select_v2(condition, input, other):
     return select_v2_op(condition, input, other)
-def select_ext(input, dim, index):
+def select_ext_view(input, dim, index):
     r"""
     Slices the input tensor along the selected dimension at the given index.
@@ -8099,15 +8372,14 @@ def select_ext(input, dim, index):
         ``Ascend``
     Examples:
-        >>> import mindspore
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> input = Tensor([[2, 3, 4, 5],[3, 2, 4, 5]])
-        >>> y = mint.select(input, 0, 0)
+        >>> y = ops.auto_generate.select_ext_view(input, 0, 0)
         >>> print(y)
         [2 3 4 5]
     """
-    return select_ext_op(input, dim, index)
+    return select_ext_view_op(input, dim, index)
 def select(condition, input, other):
@@ -8195,13 +8467,13 @@ def selu_ext(input):
     Examples:
         >>> import mindspore
-        >>> from mindspore import Tensor, mint
+        >>> from mindspore import Tensor, ops
         >>> import numpy as np
         >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> output = mint.nn.functional.selu(input)
+        >>> output = ops.auto_generate.selu_ext(input)
         >>> print(output)
         [[-1.1113307 4.202804 -1.7575096]
-        [ 2.101402 -1.7462534 9.456309 ]]
+         [ 2.101402 -1.7462534 9.456309 ]]
     """
     return selu_ext_op(input)
@@ -8634,7 +8906,7 @@ def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False):
     Note:
         - `solve_triangular` is currently only used in `mindscience` scientific computing scenarios and
-          dose not support other usage scenarios.
+          does not support other usage scenarios.
         - `solve_triangular` is not supported on Windows platform yet.
     Args:
@@ -9065,9 +9337,6 @@ def swiglu(input, dim=-1):
     Computes SwiGLU (Swish-Gated Linear Unit activation function) of input tensor.
     SwiGLU is a variant of the :class:`mindspore.ops.GLU` activation function, it is defined as:
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
     .. math::
         {SwiGLU}(a, b)= Swish(a) \otimes b
@@ -9075,6 +9344,9 @@ def swiglu(input, dim=-1):
     Swish(a)=a :math:`\sigma` (a), :math:`\sigma` is the :func:`mindspore.ops.sigmoid` activation function
     and :math:`\otimes` is the Hadamard product.
+    .. warning::
+        Only support on Atlas A2 training series.
     Args:
         input (Tensor): Tensor to be split. It has shape :math:`(\ast_1, N, \ast_2)`
             where `*` means, any number of additional dimensions. :math:`N` must be divisible by 2.
@@ -9307,6 +9579,30 @@ def topk_ext(input, k, dim=-1, largest=True, sorted=True):
     return topk_ext_op(input, k, dim, largest, sorted)
+def topprouter(input, capacity, expert_num, drop_type=0, threshold=0.0, router_prob=0.0):
+    r"""
+    TopPRouter implementation in MOE.
+    Inputs:
+        - **x** (Tensor) - Input Tensor of 3D, supporting types:[int32, int64]
+        - **capacity** (Int64) - The maximum number of tokens each expert can handle.
+        - **expert_num** (Int64) - The number of expert.
+        - **drop_type** (Int64) - S-Drop/K-Drop, 0 means S-Drop, 1 means K-Drop, default 0.
+        - **threshold** (float32) - Expert threshold, default 0.
+        - **router_prob** (Tensor) - Topk prob Tensor of 2D, supporting types:[float32], default 0.
+    Outputs:
+        tuple(Tensor), tuple of 2 tensors, `dispatch_index` and `combine_inex`.
+        - dispatch_index (Tensor) - Token ID processed by each expert.
+        - combine_index (Tensor) - The combine index of each token.
+    Supported Platforms:
+        ``Ascend``
+    """
+    return topprouter_op(input, capacity, expert_num, drop_type, threshold, router_prob)
 def trace_ext(input):
     r"""
     Returns a new tensor that is the sum of the `input` main trace.
@@ -9372,7 +9668,7 @@ def trace(input):
     return trace_op(input)
-def transpose_ext(input, dim0, dim1):
+def transpose_ext_view(input, dim0, dim1):
     r"""
     Interchange two axes of a tensor.
@@ -9397,14 +9693,13 @@ def transpose_ext(input, dim0, dim1):
     Examples:
         >>> import numpy as np
-        >>> from mindspore import mint
-        >>> from mindspore import Tensor
-        >>> input = Tensor(np.ones((2,3,4), dtype=np.float32))
-        >>> output = mint.transpose(input, 0, 2)
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor(np.ones((2, 3, 4), dtype=np.float32))
+        >>> output = ops.auto_generate.transpose_ext_view(input, 0, 2)
         >>> print(output.shape)
         (4, 3, 2)
     """
-    return transpose_ext_op(input, dim0, dim1)
+    return transpose_ext_view_op(input, dim0, dim1)
 def transpose(input, input_perm):
@@ -9440,6 +9735,57 @@ def transpose(input, input_perm):
     return transpose_op(input, input_perm)
+def transpose_view(input, input_perm):
+    r"""
+    Permutes the dimensions of the input tensor according to input permutation.
+    For a 1-D array this has no effect, as a transposed vector is simply the same vector.
+    To convert a 1-D array into a 2D column vector please refer to :func:`mindspore.ops.expand_dims`.
+    For a 2-D array, this is a standard matrix transpose. For an n-D array, if axes are given,
+    their order indicates how the axes are permuted (see Examples).
+    If axes are not provided and a.shape is :math:`(i[0], i[1], ... i[n-2], i[n-1])`,
+    then a.transpose().shape is :math:`(i[n-1], i[n-2], ... i[1], i[0])`.
+    Note:
+        On GPU and CPU, if the value of `input_perm` is negative, its actual value is `input_perm[i] + rank(input)`.
+        Negative value of `input_perm` is not supported on Ascend.
+    Args:
+        input (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+        input_perm (tuple[int]): The permutation to be converted. The elements in `input_perm` are composed of
+            the indexes of each dimension of `input`. The length of `input_perm` and the shape of `input` must be
+            the same. Only constant value is allowed. Must be in the range [-rank(input), rank(input)).
+    Returns:
+        Tensor, the type of output tensor is the same as `input` and the shape of output tensor is decided by the
+        shape of `input` and the value of `input_perm`.
+    Raises:
+        TypeError: If `input_perm` is not a tuple.
+        ValueError: If length of shape of `input` is not equal to length of shape of `input_perm`.
+        ValueError: If the same element exists in `input_perm`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> input = Tensor(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), mindspore.float32)
+        >>> input_perm = (0, 2, 1)
+        >>> output = ops.TransposeView()(input, input_perm)
+        >>> print(output)
+        [[[ 1.  4.]
+          [ 2.  5.]
+          [ 3.  6.]]
+         [[ 7. 10.]
+          [ 8. 11.]
+          [ 9. 12.]]]
+    """
+    return transpose_view_op(input, input_perm)
 def triangular_solve(b, A, upper=True, transpose=False, unitriangular=False):
     r"""
     Solves a system of equations with a square upper or lower triangular invertible matrix `A` and multiple right-hand sides `b`.
@@ -10278,8 +10624,68 @@ def quant_batch_matmul(x1, x2, scale, offset=None, bias=None, pertokenScaleOptio
     return quant_batch_matmul_impl(x1, x2, scale, offset, bias, pertokenScaleOptional, transpose_x1, transpose_x2, dtype)
+def quant_matmul(x1, x2, scale, offset=None, pertoken_scale=None, bias=None, output_dtype=None, x1_dtype=None, x2_dtype=None, pertoken_scale_dtype=None, scale_dtype=None, group_sizes=None):
+    r"""
+    """
+    return quant_matmul_op(x1, x2, scale, offset, pertoken_scale, bias, output_dtype, x1_dtype, x2_dtype, pertoken_scale_dtype, scale_dtype, group_sizes)
 def weight_quant_batch_matmul(x, weight, antiquant_scale, antiquant_offset=None, quant_scale=None, quant_offset=None, bias=None, transpose_x=False, transpose_weight=False, antiquant_group_size=0):
     r"""
     """
     return weight_quant_batch_matmul_impl(x, weight, antiquant_scale, antiquant_offset, quant_scale, quant_offset, bias, transpose_x, transpose_weight, antiquant_group_size)
+def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
+    r"""
+    Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
+    .. warning::
+        - It is only supported on Atlas A2 Training Series Products.
+        - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
+    Args:
+        permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
+            The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
+        sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
+            The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
+            It only supports the int32 data type.
+        probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
+            If provided, the unpermuted tokens will be merged with their respective probabilities.
+            The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
+        padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
+        restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
+    Returns:
+        Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
+        If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
+    Raises:
+        TypeError: If `permuted_tokens` is not a Tensor.
+        ValueError: Only supported when `padded_mode` is ``False``.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, ops
+        >>> permuted_token = Tensor([
+        ...                          [1, 1, 1],
+        ...                          [0, 0, 0],
+        ...                          [0, 0, 0],
+        ...                          [3, 3, 3],
+        ...                          [2, 2, 2],
+        ...                          [1, 1, 1],
+        ...                          [2, 2, 2],
+        ...                          [3, 3, 3]], dtype=mindspore.bfloat16)
+        >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
+        >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
+        >>> out.shape
+        (8, 3)
+    """
+    return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)