mindspore 2.6.0rc1__cp311-none-any.whl → 2.7.0__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +1290 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cpython-311-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-311-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-311-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/optimize/__init__.py +23 -0
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +65 -84
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +58 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +3209 -3084
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +377 -203
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/dynamic_shape/auto_dynamic_shape.py +498 -0
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +117 -131
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +67 -55
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +70 -24
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/include/mindapi/base/type_id.h +3 -0
- mindspore/include/mindapi/base/types.h +7 -0
- mindspore/include/mindspore/ccsrc/availability/silent_check/silent_check.h +3 -4
- mindspore/include/mindspore/ccsrc/backend/backend_manager/backend_jit_config.h +47 -4
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/adapter/graph_kernel_cluster_cloud.h +1 -0
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/adapter/graph_kernel_comm_info_manager.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/eliminate_redundant_output.h +1 -0
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/graph_kernel_expander.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/graph_kernel_utils.h +25 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/update_state_formatter.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/depend_edge_elimination.h +61 -0
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/graph_kernel_flags.h +10 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/model/lite_graph.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/model/node.h +5 -6
- mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_reuse.h +1 -2
- mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_reuse_checker.h +0 -1
- mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_swap_manager.h +0 -1
- mindspore/include/mindspore/ccsrc/backend/common/optimizer/cache_manager.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/convert_custom_op.h +2 -2
- mindspore/include/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/link_custom_op.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/add_attr_to_node/add_attr_to_node_register.h +1 -3
- mindspore/include/mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/convert_list_to_tuple.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/custom_defined_depend.h +1 -3
- mindspore/include/mindspore/ccsrc/backend/common/pass/gradients_allreduce_depend_last_send.h +1 -2
- mindspore/include/mindspore/ccsrc/backend/common/pass/graph_view_replace_pass.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_communication.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/ir_fusion/flash_attention_fusion.h +72 -0
- mindspore/include/mindspore/ccsrc/backend/common/pass/label_1f1b_overlap_node.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/mindir/add_depend_for_adamw.h +1 -3
- mindspore/include/mindspore/ccsrc/backend/common/pass/mindir/all_to_all_unify_mindir.h +8 -0
- mindspore/include/mindspore/ccsrc/backend/common/pass/optimize_gradients_allreduce_overlap.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.h +1 -0
- mindspore/include/mindspore/ccsrc/backend/common/session/exec_order_builder.h +0 -2
- mindspore/include/mindspore/ccsrc/backend/common/session/executor.h +0 -41
- mindspore/include/mindspore/ccsrc/backend/common/session/kernel_graph_mgr.h +14 -10
- mindspore/include/mindspore/ccsrc/backend/common/session/session_basic.h +15 -31
- mindspore/include/mindspore/ccsrc/backend/common/somas/somas.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/common/somas/somas_node.h +8 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/dump/hook_dynamic_loader.h +5 -0
- mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_device_res_manager.h +1 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_graph_executor.h +13 -14
- mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_utils.h +0 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/ge_backend.h +10 -9
- mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/convert.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/graph_runner.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/utils.h +12 -14
- mindspore/include/mindspore/ccsrc/backend/ge_backend/pass/matmul_allreduce_fusion.h +51 -0
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/abstract_actor.h +26 -19
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_common.h +14 -5
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_dump.h +2 -1
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_set.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/control_actor.h +38 -38
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/entrance_actor.h +9 -9
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/exit_actor.h +8 -8
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/gather_actor.h +6 -6
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/stack_actor.h +11 -11
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/switch_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/data_prepare_actor.h +16 -16
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/data_source_actor.h +9 -9
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/debug_actor.h +8 -8
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/debug_aware_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/loop_count_actor.h +6 -6
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/memory_aware_actor.h +6 -6
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/memory_manager_actor.h +8 -11
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/output_actor.h +8 -5
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/profiler_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/recorder_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/super_kernel_actor.h +17 -17
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/device_tensor_store.h +24 -24
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/graph_compiler.h +8 -12
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/graph_partition.h +49 -0
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/scheduler_helper.h +1 -1
- mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/segment_runner.h +50 -0
- mindspore/include/mindspore/ccsrc/backend/ge_backend/utils/device_address_utils.h +4 -7
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/op_backend.h +13 -24
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/transform.h +2 -8
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/vm.h +1 -5
- mindspore/include/mindspore/ccsrc/backend/ms_backend/ms_backend.h +0 -39
- mindspore/include/mindspore/ccsrc/backend/ms_backend/ms_backend_base.h +10 -5
- mindspore/include/mindspore/ccsrc/debug/checksum/checksum.h +35 -0
- mindspore/include/mindspore/ccsrc/debug/checksum/checksum_kernel.h +64 -0
- mindspore/include/mindspore/ccsrc/debug/checksum/checksum_mgr.h +50 -0
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/check_overflow.h +1 -11
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/common.h +0 -13
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/kernel_launcher.h +3 -3
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/mean.h +1 -1
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/mem_manager.h +65 -0
- mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/statistic_kernel.h +7 -8
- mindspore/include/mindspore/ccsrc/debug/data_dump/overflow_counter.h +6 -1
- mindspore/include/mindspore/ccsrc/debug/data_dump/tensor_info_collect.h +21 -6
- mindspore/include/mindspore/ccsrc/debug/data_dump/tensor_statistic.h +2 -2
- mindspore/include/mindspore/ccsrc/debug/debug_services.h +1 -4
- mindspore/include/mindspore/ccsrc/debug/debugger/debugger_utils.h +7 -8
- mindspore/include/mindspore/ccsrc/debug/debugger/tensor_summary.h +0 -53
- mindspore/include/mindspore/ccsrc/debug/dump/tensordump_control.h +6 -2
- mindspore/include/mindspore/ccsrc/debug/dump/utils.h +30 -0
- mindspore/include/mindspore/ccsrc/debug/profiler/mstx/mstx_impl.h +37 -24
- mindspore/include/mindspore/ccsrc/debug/profiler/mstx/mstx_symbol.h +63 -4
- mindspore/include/mindspore/ccsrc/debug/profiler/profiler.h +37 -15
- mindspore/include/mindspore/ccsrc/debug/profiler/profiling.h +9 -6
- mindspore/include/mindspore/ccsrc/debug/profiler/profiling_framework_data.h +2 -0
- mindspore/include/mindspore/ccsrc/debug/profiler/python_obj_pointer.h +7 -7
- mindspore/include/mindspore/ccsrc/debug/profiler/report_data.h +23 -0
- mindspore/include/mindspore/ccsrc/debug/profiler/thread.h +2 -2
- mindspore/include/mindspore/ccsrc/debug/summary/summary.h +1 -1
- mindspore/include/mindspore/ccsrc/debug/utils.h +0 -5
- mindspore/include/mindspore/ccsrc/distributed/cluster/actor_route_table_proxy.h +1 -1
- mindspore/include/mindspore/ccsrc/distributed/cluster/actor_route_table_service.h +0 -2
- mindspore/include/mindspore/ccsrc/distributed/cluster/topology/meta_server_node.h +5 -5
- mindspore/include/mindspore/ccsrc/distributed/persistent/storage/local_file.h +0 -1
- mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/connection.h +0 -1
- mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/socket_operation.h +0 -1
- mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.h +0 -1
- mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/tcp_socket_operation.h +0 -1
- mindspore/include/mindspore/ccsrc/frontend/ir/primitive_py.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/ir/py_execute_py.h +11 -28
- mindspore/include/mindspore/ccsrc/frontend/ir/storage.h +44 -0
- mindspore/include/mindspore/ccsrc/frontend/ir/storage_base.h +45 -0
- mindspore/include/mindspore/ccsrc/frontend/ir/tensor_py.h +13 -22
- mindspore/include/mindspore/ccsrc/frontend/np_dtypes/np_dtypes.h +29 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/composite/composite.h +76 -5
- mindspore/include/mindspore/ccsrc/frontend/operator/composite/do_signature.h +2 -1
- mindspore/include/mindspore/ccsrc/frontend/operator/composite/functional_overload.h +46 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/common/meta_impl.h +161 -22
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/common/utils.h +38 -61
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/any.h +27 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/any_ext.h +27 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/conv3d_padding.h +41 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/einsum_ext.h +47 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/func_dropout_ext.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/func_max_pool2d.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_backward.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_backward_fusion.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_common_utils.h +30 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2_backward.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2_backward_fusion.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/inplace_exponential.h +31 -0
- mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/moe_token_unpermute.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h +22 -3
- mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h +21 -10
- mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/grad.h +5 -2
- mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/pynative_jit_grad.h +11 -2
- mindspore/include/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.h +3 -13
- mindspore/include/mindspore/ccsrc/frontend/optimizer/cse_pass.h +3 -7
- mindspore/include/mindspore/ccsrc/frontend/optimizer/graph_transform.h +3 -2
- mindspore/include/mindspore/ccsrc/frontend/optimizer/inplace_input_replace.h +30 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/add_forward_monad_depend.h +51 -37
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h +7 -104
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/call_graph_tuple_transform.h +2 -1
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/check_invalid_view_inplace_dout.h +46 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/const_output_eliminate.h +4 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/expand_dump_flag.h +1 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/get_grad_eliminate.h +5 -1
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h +3 -3
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/inline.h +35 -9
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/item_dict_eliminate.h +3 -1
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h +1 -17
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/j_node_and_user_rematch.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/loop_unroll.h +1 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/parameter_eliminate.h +3 -3
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h +5 -3
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/recompute_prepare.h +1 -0
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h +9 -3
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/stack_unstack_eliminate.h +13 -6
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/stopgrad_eliminate.h +3 -1
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/switch_or_switch_layer_defer_inline.h +13 -5
- mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass.h +7 -7
- mindspore/include/mindspore/ccsrc/frontend/optimizer/opt.h +10 -5
- mindspore/include/mindspore/ccsrc/frontend/optimizer/optimizer.h +55 -280
- mindspore/include/mindspore/ccsrc/frontend/optimizer/pattern_matcher.h +8 -2
- mindspore/include/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h +5 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/auto_parallel/stage_compute.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/came_parallel_handler.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/costmodel_context.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/dynamic_creator.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/flops_collection.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/fold_pipeline_split_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/grad_accumulation_utils.h +2 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.h +2 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.h +3 -2
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h +2 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.h +50 -14
- mindspore/include/mindspore/ccsrc/frontend/parallel/interleaved_parallel/interleaved_parallel.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h +0 -11
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/addn_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/apply_rotary_pos_emb_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h +44 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/avgpool_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/cdist_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/conv3d_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/f_f_n_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fft_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fillv2_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/flash_attention_score_info.h +20 -20
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fused_infer_attention_score_info.h +15 -15
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/gamma_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/gather_info.h +16 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/grid_sampler2d.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/group_norm_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/incre_flash_attention_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/index_add_info.h +55 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/inplace_op_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/iou_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kldiv_loss_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kv_cache_mgr_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kv_cache_scatter_update_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/lin_space_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h +34 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h +14 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/max_avg_pool_3d_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_compute_expert_tokens_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_finalize_routing_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_gating_top_k_softmax_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_init_routing_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h +7 -2
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h +18 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/pad_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/paged_attention_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/paged_attention_mask_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/prompt_flash_attention_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_batch_matmul_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_linear_sparse_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reduce_base_method_info.h +3 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reshape_and_cache_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h +9 -4
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_math_ops_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_nd_ops_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_ops_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/self_define_shard_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/stand_alone_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/topkrouter_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/topprouter_info.h +55 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/tracev2_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/tril_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/weight_quant_batch_matmul_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/wkv_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_postprocessor.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_preprocessor.h +2 -2
- mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_processor.h +5 -4
- mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_processor_context.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_whole_graph_processor.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/parameter_manager.h +0 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/allreduce_slice_to_reducescatter.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/begin_end_overlap_inline.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/bias_add_comm_swap.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/fias_sp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/flash_sp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/float32_redistribution.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/full_micro_interleaved_order_control.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/handle_group_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_branches_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_parallel_branches.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_split_concat_branches.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/label_fine_grained_interleaved_index.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/label_micro_interleaved_index.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/matmul_add_comm_reduction.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_cast_opt.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_comm.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_recompute_call_nodes.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/micro_interleaved_order_control.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/optimize_parallel_allgather_comm.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_comm.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_flash_sp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_ring_attention.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_gradmatmul_and_gradallreduce.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_opt_shard_in_pipeline.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_param_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_and_grad_model_parallel.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_comm.h +28 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/pass_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/remove_cast_before_assign_add.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/reorder_send_recv_between_fp_bp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/slice_activation_in_cell_share_recompute.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/split_layernorm_comm_fp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/split_matmul_comm_elementwise_fp.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pass/swap_dp_allreduce_reducescatter.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/detach_backward.h +70 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/fold_pipeline_transformer.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/gpipe_interleave_scheduler.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_interleave.h +14 -3
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_scheduler.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/seqpipe_scheduler.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/zero_bubble_v.h +124 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/shard/shard.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/step_assigned_parallel.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/step_parallel_utils.h +10 -3
- mindspore/include/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/strategy_checkpoint_info.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/strategy_loader.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/shared_parameter.h +1 -1
- mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h +3 -0
- mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_transform.h +2 -2
- mindspore/include/mindspore/ccsrc/include/backend/anf_runtime_algorithm.h +29 -13
- mindspore/include/mindspore/ccsrc/include/backend/data_queue/data_queue_mgr.h +1 -5
- mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_control.h +1 -0
- mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h +4 -5
- mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_utils.h +1 -0
- mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/tensor_stat_dump.h +0 -3
- mindspore/include/mindspore/ccsrc/include/backend/debug/debugger/debugger.h +0 -2
- mindspore/include/mindspore/ccsrc/include/backend/debug/execute_order_tracker/execute_order_tracker.h +14 -4
- mindspore/include/mindspore/ccsrc/include/backend/debug/tensor_data.h +0 -19
- mindspore/include/mindspore/ccsrc/include/backend/distributed/cluster/tcp_store.h +53 -0
- mindspore/include/mindspore/ccsrc/include/backend/distributed/collective/collective_manager.h +17 -10
- mindspore/include/mindspore/ccsrc/include/backend/distributed/constants.h +1 -10
- mindspore/include/mindspore/ccsrc/include/backend/distributed/embedding_cache/embedding_hash_map.h +0 -2
- mindspore/include/mindspore/ccsrc/include/backend/distributed/ps/ps_context.h +0 -6
- mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/tcp/constants.h +2 -1
- mindspore/include/mindspore/ccsrc/include/backend/kernel_graph.h +0 -1
- mindspore/include/mindspore/ccsrc/include/backend/kernel_info.h +8 -10
- mindspore/include/mindspore/ccsrc/include/backend/mbuf_device_address.h +5 -5
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/abstract_dynamic_mem_pool.h +10 -5
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/address_discretizer.h +63 -0
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/dynamic_mem_pool.h +12 -0
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/max_segment_tree.h +181 -0
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_dynamic_allocator.h +0 -1
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_pool_util.h +98 -0
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_tracker.h +11 -107
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/race_checker.h +64 -0
- mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/tracker_graph.h +91 -0
- mindspore/include/mindspore/ccsrc/include/backend/optimizer/helper.h +0 -2
- mindspore/include/mindspore/ccsrc/include/backend/optimizer/inplace_node_pass.h +1 -9
- mindspore/include/mindspore/ccsrc/include/backend/optimizer/optimizer.h +3 -6
- mindspore/include/mindspore/ccsrc/include/backend/optimizer/pattern_engine.h +0 -2
- mindspore/include/mindspore/ccsrc/include/backend/optimizer/visitor.h +2 -0
- mindspore/include/mindspore/ccsrc/include/backend/py_execute_utils.h +2 -1
- mindspore/include/mindspore/ccsrc/include/common/debug/common.h +2 -1
- mindspore/include/mindspore/ccsrc/include/common/debug/draw.h +3 -1
- mindspore/include/mindspore/ccsrc/include/common/debug/dump_proto.h +2 -10
- mindspore/include/mindspore/ccsrc/include/common/expander/core/emitter.h +3 -2
- mindspore/include/mindspore/ccsrc/include/common/expander/core/node.h +1 -1
- mindspore/include/mindspore/ccsrc/include/common/fallback.h +9 -0
- mindspore/include/mindspore/ccsrc/include/common/pybind_api/api_register.h +7 -1
- mindspore/include/mindspore/ccsrc/include/common/pynative/abstract_converter.h +0 -4
- mindspore/include/mindspore/ccsrc/include/common/pynative/adapter.h +2 -5
- mindspore/include/mindspore/ccsrc/include/common/pynative/common_utils.h +5 -1
- mindspore/include/mindspore/ccsrc/include/common/pynative/grad_state.h +12 -0
- mindspore/include/mindspore/ccsrc/include/common/pynative/variable.h +326 -0
- mindspore/include/mindspore/ccsrc/include/common/random.h +1 -2
- mindspore/include/mindspore/ccsrc/include/common/runtime_conf/runtime_conf.h +11 -7
- mindspore/include/mindspore/ccsrc/include/common/runtime_conf/thread_bind_core.h +12 -10
- mindspore/include/mindspore/ccsrc/include/common/symbol_engine/symbol_engine_impl.h +16 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/anfalgo.h +13 -2
- mindspore/include/mindspore/ccsrc/include/common/utils/comm_manager.h +0 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/compile_cache_context.h +4 -2
- mindspore/include/mindspore/ccsrc/include/common/utils/convert_utils.h +18 -4
- mindspore/include/mindspore/ccsrc/include/common/utils/convert_utils_py.h +2 -10
- mindspore/include/mindspore/ccsrc/include/common/utils/cse.h +0 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/json_operation_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/ms_device_shape_transfer.h +0 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/parallel_context.h +10 -2
- mindspore/include/mindspore/ccsrc/include/common/utils/python_adapter.h +3 -3
- mindspore/include/mindspore/ccsrc/include/common/utils/stub_tensor.h +1 -3
- mindspore/include/mindspore/ccsrc/include/common/utils/summary/event_writer.h +1 -1
- mindspore/include/mindspore/ccsrc/include/common/utils/tensor_py.h +24 -117
- mindspore/include/mindspore/ccsrc/include/common/utils/tensor_py_wrapper.h +1 -7
- mindspore/include/mindspore/ccsrc/include/common/utils/tensor_utils.h +3 -3
- mindspore/include/mindspore/ccsrc/include/common/utils/utils.h +8 -8
- mindspore/include/mindspore/ccsrc/include/common/visible.h +0 -10
- mindspore/include/mindspore/ccsrc/kernel/environ_manager.h +2 -0
- mindspore/include/mindspore/ccsrc/kernel/framework_utils.h +3 -0
- mindspore/include/mindspore/ccsrc/kernel/graph_kernel_info.h +1 -1
- mindspore/include/mindspore/ccsrc/kernel/philox_random.h +47 -87
- mindspore/include/mindspore/ccsrc/minddata/dataset/api/python/python_mp.h +26 -6
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/config_manager.h +9 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/data_type.h +1 -56
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/device_buffer.h +74 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/message_queue.h +29 -20
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/shared_memory_queue.h +47 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/core/tensor.h +26 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/connector.h +7 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_info.h +61 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h +28 -39
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.h +3 -3
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/gpu_map_job.h +3 -3
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_job.h +2 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.h +12 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/npu_map_job.h +3 -3
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/receive_bridge_op.h +4 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/send_bridge_op.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h +9 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/include/dataset/vision_ascend.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/data/parse_example_op.h +19 -15
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/acl_adapter.h +28 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/AclLiteType.h +6 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/AclLiteUtils.h +6 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h +2 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ThreadSafeQueue.h +3 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/VdecHelper.h +9 -12
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/acl_plugin.h +19 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_video.h +29 -10
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_video_utils.h +80 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/container.h +100 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/context.h +102 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/format.h +45 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/frame.h +74 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/packet.h +59 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/stream.h +93 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h +37 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/btree.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/command.h +29 -0
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/cond_var.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/ftok_key.h +2 -2
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/json_helper.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/log_adapter.h +0 -5
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/queue.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/sig_handler.h +11 -3
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/system_pool.h +1 -1
- mindspore/include/mindspore/ccsrc/minddata/dataset/util/task_manager.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/common/log_adapter.h +0 -5
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h +0 -1
- mindspore/include/mindspore/ccsrc/minddata/utils.h +30 -2
- mindspore/include/mindspore/ccsrc/ms_extension/all.h +46 -0
- mindspore/include/mindspore/ccsrc/ms_extension/api.h +33 -0
- mindspore/include/mindspore/ccsrc/ms_extension/ascend/atb/atb_common.h +98 -0
- mindspore/include/mindspore/ccsrc/ms_extension/ascend/atb/operation_cache.h +229 -0
- mindspore/include/mindspore/ccsrc/ms_extension/common/tensor.h +319 -0
- mindspore/include/mindspore/ccsrc/ms_extension/common/tensor_utils.h +83 -0
- mindspore/include/mindspore/ccsrc/ms_extension/common/visible.h +28 -0
- mindspore/include/mindspore/ccsrc/ms_extension/pynative/pyboost_extension.h +312 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/capture_context.h +1 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/eval_frame_hook.h +2 -2
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_build/build_graph_utils.h +1 -7
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_build/func_graph_builder.h +25 -6
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.h +92 -19
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/cfg.h +19 -5
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/code_generator.h +53 -49
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph.h +40 -10
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.h +3 -19
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_arguments_optimizer.h +145 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h +13 -26
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/node.h +33 -3
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/side_effect.h +116 -27
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/abstract_type.h +2 -2
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/compiler.h +4 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/utils.h +2 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/cache.h +32 -8
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/guard.h +11 -15
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/guard_utils.h +17 -8
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h +0 -2
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/shape_ctx.h +3 -3
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.h +50 -93
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/jit_compile_results.h +1 -16
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h +11 -22
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/py_code.h +56 -5
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/py_frame.h +6 -7
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/pydef.h +0 -6
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/runtime.h +1 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/opcode_util.h +5 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/stop_trace_reason.h +26 -17
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h +15 -11
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/action.h +3 -4
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/executor_py.h +116 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/graph_executor_py.h +118 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/jit_executor_py.h +68 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/graph_circle_handler.h +35 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.h +6 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/function_block.h +1 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/parse.h +10 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/parse_base.h +17 -3
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/resolve.h +15 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pass.h +4 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pass_config.h +4 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pipeline.h +17 -171
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/resource.h +8 -16
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/async_eval_result.h +2 -2
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/auto_monad.h +1 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/builtin_prim.h +3 -3
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.h +14 -12
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/order_enforce.h +1 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/prim.h +13 -76
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/prim_utils.h +78 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.h +1 -1
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/static_analysis.h +4 -5
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/validator.h +3 -0
- mindspore/include/mindspore/ccsrc/pipeline/jit/trace/trace_recorder.h +19 -4
- mindspore/include/mindspore/ccsrc/pipeline/llm_boost/llm_boost_binder.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_dma_handle.h +0 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.h +4 -5
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.h +69 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.h +16 -13
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.h +0 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h +12 -8
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/stress_detect.h +40 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h +32 -7
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/feature_mgr.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_dispatcher.h +10 -8
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/add_atb_kernel.h +2 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/atb_adapter.h +24 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/atb_kernel_mod.h +3 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/inplace_grouped_matmul_add_atb_kernel.h +2 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/inplace_matmul_add_atb_kernel.h +39 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm_comm_info.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm_kernel_mod.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/lazy_fusion_kernel.h +15 -16
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/lazy_fusion_op.h +119 -93
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_all_gather_matmul.h +63 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_all_gather_v.h +51 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_matmul_all_reduce.h +2 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_matmul_reduce_scatter.h +61 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_receive.h +3 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_reduce_scatter_v.h +51 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_send.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h +26 -11
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +40 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_barrier.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +40 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/comm_common.h +4 -4
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather_into_tensor.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather_into_tensor_uneven.h +38 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_to_all_v_single.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_broadcast.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_gather_into_tensor.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter_tensor.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter_tensor_uneven.h +39 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_scatter_tensor.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/add_rms_norm_quant.h +15 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/dynamic_ntk.h +31 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/fused_add_topk_div.h +31 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/internal_kernel_in_out_map.h +16 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/internal_tiling_cache.h +3 -3
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/kv_scale_cache.h +30 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/mla.h +48 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/mla_preprocess.h +32 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/multi_weight_matmul.h +2 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/apply_rotary_pos_emb.h +46 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/auto_gen/internal_kernel_info_adapter.h +95 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/auto_gen/kernel_info_adapter.h +78 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/flash_attention_score.h +54 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/internal_kernel_info.h +162 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/internal_pyboost_utils.h +121 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/mla.h +53 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/paged_attention.h +91 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/reshape_and_cache.h +43 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/swiglu_dynamic_quant.h +32 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/rts/reshape_ext.h +5 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/boost_model_atb.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/atb_infer.h +7 -9
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/comm.h +82 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/common_op_params.h +77 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/context.h +50 -10
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/graph_op_builder.h +24 -18
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/infer_op_params.h +2331 -671
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/operation.h +29 -11
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/operation_infra.h +78 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/svector.h +19 -22
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/train_op_params.h +215 -24
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/types.h +39 -24
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/utils.h +7 -9
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/context_factory.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/event_manager.h +156 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/external_comm_manager.h +68 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/hosttensor_binder.h +0 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/model.h +33 -29
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/error.h +49 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/file_utils.h +86 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_config.h +84 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_error.h +20 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_utils.h +86 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log.h +128 -52
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/ModelTaskExecutor.h +64 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/TaskQueue.h +40 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/check_util.h +80 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/config.h +2 -12
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/file_system.h +35 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/hccl_runner.h +48 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/model_factory.h +1 -3
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/operation_factory.h +3 -3
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/operation_util.h +6 -5
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/share_memory.h +46 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/singleton.h +7 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/statistic.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/str_split.h +0 -2
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/expander_fallback.h +33 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/matmul_assignadd_fusion.h +61 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_matmul_split_fusion.h +31 -8
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_qbmm_elemwise_fusion.h +46 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_swiglu_fusion_v2.h +52 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_weight_preprocess_utils.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/moe_init_routing_dyn_quantv2_fusion.h +66 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/rms_norm_quant_fusion.h +24 -1
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/swiglu_dynamic_quant_fusion.h +47 -0
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/swiglu_reshape_dynamic_quant_fusion.h +47 -0
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_common.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.h +20 -11
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_somas.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/mpi_collective_comm_lib.h +3 -1
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h +9 -2
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/contiguous_cpu_kernel.h +8 -6
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h +2 -2
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_kernel_input_info.h +99 -0
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_op_plugin_kernel.h +62 -0
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/joinedstr_cpu_kernel.h +46 -0
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.h +1 -6
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_kernel_task.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.h +18 -18
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_somas.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/nvidia_collective_comm_lib.h +3 -1
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_device_address/ascend_device_address.h +27 -16
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_res_manager.h +27 -11
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/capture_graph/ascend_capture_graph.h +45 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ascend_collective_comm_lib.h +6 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ascend_communication_group.h +27 -12
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ccool_collective_comm_lib.h +4 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ccool_communication_group.h +4 -4
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dummy_ascend_collective_comm_lib.h +3 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dvm_collective_comm_lib.h +5 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dvm_communication_group.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/hccl_watch_dog_thread.h +5 -3
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/leaper_trans.h +2 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/lowlatency_collective_comm_lib.h +20 -5
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/multi_ascend_collective_comm_lib.h +6 -6
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/multi_ascend_communication_group.h +4 -2
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/utils.h +83 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/device_context_conf/op_tuning_conf.h +14 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/dvm/dvm.h +246 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hal_manager/ascend_hal_manager.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hccl_adapter/hccl_adapter.h +31 -3
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hccl_adapter/plugin/hccl_plugin.h +8 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mbuf_manager/mbuf_receive_manager.h +6 -1
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mbuf_manager/tdt_manager.h +44 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/abstract_ascend_memory_pool_support.h +2 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_dynamic_mem_adapter.h +3 -2
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_adapter.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_manager.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_pool.h +14 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_two_pointer_mem_adapter.h +1 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h +11 -9
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/custom_op_proto/cust_array_ops.h +11 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/custom_op_proto/cust_other_ops.h +0 -22
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_base.h +38 -33
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_map.h +5 -2
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_util.h +3 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/array_ops_declare.h +3 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/hcom_ops_declare.h +3 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/transform_fusion_ops_declare.h +0 -6
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/transform_util.h +1 -1
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/stream_manager/ascend_stream_manager.h +6 -4
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/acl_mdl_symbol.h +14 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/acl_rt_symbol.h +6 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/symbol_utils.h +11 -8
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_device_address/cpu_device_address.h +17 -12
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_hash_table.h +128 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_hash_table_util.h +114 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_memory_manager.h +4 -10
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_memory_pool.h +3 -1
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_res_manager.h +8 -15
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/visible.h +32 -0
- mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/device/gpu_device_address.h +19 -11
- mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/gpu_res_manager.h +9 -9
- mindspore/include/mindspore/ccsrc/ps/core/collective_ops_impl.h +31 -6
- mindspore/include/mindspore/ccsrc/ps/core/communicator/http_request_handler.h +0 -1
- mindspore/include/mindspore/ccsrc/ps/core/file_configuration.h +2 -2
- mindspore/include/mindspore/ccsrc/ps/core/node.h +1 -1
- mindspore/include/mindspore/ccsrc/pybind_api/hal/event_py.h +2 -2
- mindspore/include/mindspore/ccsrc/pybind_api/hal/memory_py.h +2 -0
- mindspore/include/mindspore/ccsrc/pybind_api/hal/stream_py.h +3 -4
- mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_api/auto_generate/tensor_api.h +138 -133
- mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_index_py.h +10 -107
- mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_register/auto_generate/tensor_py_gen.h +138 -134
- mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_register/tensor_func_reg.h +0 -1
- mindspore/include/mindspore/ccsrc/pybind_api/resource/manager.h +2 -2
- mindspore/include/mindspore/ccsrc/pybind_api/storage_py.h +36 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/abs.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/acos_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/acosh_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adamw.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool2d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_layernorm_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_rms_norm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addbmm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addcdiv_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addcmul_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addmm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addmv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/all_finite.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/all_gather_matmul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/any.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/any_ext.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/apply_rotary_pos_emb.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/arange.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmax_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmax_with_value.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmin_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmin_with_value.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argsort.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/as_strided.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/asin_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/asinh_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atan2_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atan_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atanh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/baddbmm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_mat_mul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_elemt.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_stats.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bernoulli_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bincount_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_not.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bmm_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/broadcast_to.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/broadcast_to_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cast.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ceil.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cell_backward_hook.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/chunk.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/chunk_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clamp_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clamp_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clone.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/col2im_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/col2im_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/concat.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/constant_pad_nd.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/contiguous.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv1d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv1d_padding.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv2d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv2d_padding.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv3d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv3d_padding.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv_transpose2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_str.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_str_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/copy.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cos.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cosh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/count_nonzero.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross_entropy_loss.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross_entropy_loss_grad.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cummax.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cummin_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cumsum_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/custom_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dense.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/diag_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/diagonal_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_barrier.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/div.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divmod.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divmods.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divs.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dot.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_do_mask_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_gen_mask_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/einsum_ext.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/embedding.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/embedding_dense_backward.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/empty.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/empty_like.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/equal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/equal_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erf.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erfc.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erfinv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/exp.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/exp2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_as.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_dims.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_dims_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expm1.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/eye.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ffn_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fill_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fill_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flash_attention_score.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flash_attention_score_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flatten_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor_div.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor_div_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fmod_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fmod_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/frac.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/full_like.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/func_dropout_ext.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/func_max_pool2d.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gather_d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gcd.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/generator.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/glu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/glu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_backward.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_backward_fusion.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2_backward.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2_backward_fusion.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater_equal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater_equal_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_3d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/group_norm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/group_norm_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hardtanh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hardtanh_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/histc_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hshrink.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hshrink_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hsigmoid.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hswish.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hswish_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/identity.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/im2col_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/incre_flash_attention.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_add_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_fill_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_fill_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_select.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_irecv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_index.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_moe_token_unpermute.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_non_zero.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_add_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_addmm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_adds_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_bernoulli_scalar.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_bernoulli_tensor.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_copy.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_div.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divmod.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divmods.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divs.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_elu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_erfinv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_exp.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_exponential.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor_divide.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor_divides.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_hardtanh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_index_add.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_index_put.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_log.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_matmul_add.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_mul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_muls.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_normal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_put.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_random.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_relu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_add.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_src.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_value.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_silu.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_stop_gradient.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_sub_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_tanh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_threshold.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_uniform.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_zero.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isclose.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isfinite.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isinf.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isneginf.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kl_div.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kl_div_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kthvalue.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/l1_loss_backward_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/l1_loss_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/layer_norm_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/layer_norm_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/leaky_relu_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lerp.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lerp_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/less.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/less_equal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lin_space_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/linalg_qr.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/linalg_vector_norm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log10.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log1p.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logaddexp.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logaddexp2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_and.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_not.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_or.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_xor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsigmoid.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsumexp.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_fill.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_scatter.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_select.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_select_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matrix_inverse_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_dim.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_grad_with_indices.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_grad_with_mask.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_with_indices.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_with_mask.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_unpool2d_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/maximum.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mean_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/median_dim.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/median_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/meshgrid.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/min.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/min_dim.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/minimum.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mish_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mish_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mla.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mm_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_distribute_combine.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_distribute_dispatch.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_finalize_routing.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing_quant_v2.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_permute.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_unpermute.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mse_loss_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mse_loss_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/muls.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multinomial_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mv.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nan_to_num.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nansum.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/narrow.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/narrow_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ne_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/neg.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_empty.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_full.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_ones.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_zeros.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/non_zero.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/non_zero_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/norm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_float_float.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_float_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_tensor_float.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/not_equal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/one_hot_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ones.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ones_like_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/outer.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/paged_attention.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pixel_shuffle.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/polar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prelu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prelu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prod_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prompt_flash_attention.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_matmul.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rand_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rand_like_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randint.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randint_like.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randn.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randn_like.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randperm_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reciprocal.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_all.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_any.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_max.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_min.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_3d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/relu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/relu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_int.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_3d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reshape.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reshape_and_cache.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reverse_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ring_attention_update.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rms_norm.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rms_norm_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/roll.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rotary_position_embedding.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/round.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rsqrt.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter_add_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter_value.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/searchsorted.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_ext_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/selu_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/selu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sigmoid.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sigmoid_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sign.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silent_check_v2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silent_check_v3.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sin.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sinc.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sinh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice_ext_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/smooth_l1_loss.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/soft_margin_loss.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softmax.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softmax_backward.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softplus_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softplus_grad_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softshrink.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softshrink_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sort_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/speed_fusion_attention.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_tensor.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_tensor_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_with_size.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_with_size_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sqrt.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/square.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/squeeze.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/stack_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/std.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/std_mean.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub_scalar.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sum_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/swiglu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/swiglu_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/t_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/take.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tan.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tanh.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tanh_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/threshold.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/threshold_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tile.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/topk_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/trace_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_ext_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/triangular_solve.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tril_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/triu.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/trunc.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/type_as.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/uniform_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique2.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique_consecutive.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique_dim.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unstack_ext_view.h +44 -0
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_linear1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest1d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest2d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest3d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/var.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/var_mean.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/view.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/view_as.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/zeros.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/zeros_like_ext.h +1 -1
- mindspore/include/mindspore/ccsrc/pyboost/comm_handle.h +6 -2
- mindspore/include/mindspore/ccsrc/pyboost/customize/any.h +39 -0
- mindspore/include/mindspore/ccsrc/pyboost/customize/cell_backward_hook.h +27 -0
- mindspore/include/mindspore/ccsrc/pyboost/customize/divmod.h +3 -3
- mindspore/include/mindspore/ccsrc/pyboost/customize/einsum_ext.h +38 -0
- mindspore/include/mindspore/ccsrc/pyboost/customize/identity.h +2 -2
- mindspore/include/mindspore/ccsrc/pyboost/customize/meshgrid.h +7 -4
- mindspore/include/mindspore/ccsrc/pyboost/customize/op_common.h +12 -15
- mindspore/include/mindspore/ccsrc/pyboost/customize/pixel_shuffle.h +2 -3
- mindspore/include/mindspore/ccsrc/pyboost/customize/reshape.h +4 -3
- mindspore/include/mindspore/ccsrc/pyboost/customize/searchsorted.h +4 -5
- mindspore/include/mindspore/ccsrc/pyboost/functions/auto_generate/auto_grad_op_reg.h +1591 -1387
- mindspore/include/mindspore/ccsrc/pyboost/functions/auto_generate/functions.h +578 -481
- mindspore/include/mindspore/ccsrc/pyboost/grad_functions/pyboost_grad_functions.h +3 -0
- mindspore/include/mindspore/ccsrc/pyboost/grad_functions/value_converter.h +30 -4
- mindspore/include/mindspore/ccsrc/pyboost/op_register.h +52 -0
- mindspore/include/mindspore/ccsrc/pyboost/op_runner.h +45 -19
- mindspore/include/mindspore/ccsrc/pyboost/pyboost_utils.h +106 -62
- mindspore/include/mindspore/ccsrc/pynative/base.h +22 -24
- mindspore/include/mindspore/ccsrc/pynative/forward/do_pyboost_cast.h +71 -67
- mindspore/include/mindspore/ccsrc/pynative/forward/forward.h +14 -12
- mindspore/include/mindspore/ccsrc/pynative/forward/forward_task.h +34 -2
- mindspore/include/mindspore/ccsrc/pynative/grad/custom_function.h +14 -7
- mindspore/include/mindspore/ccsrc/pynative/grad/function/auto_generate/pyboost_native_grad_functions.h +501 -457
- mindspore/include/mindspore/ccsrc/pynative/grad/function/func_builder.h +3 -3
- mindspore/include/mindspore/ccsrc/pynative/grad/function/func_grad.h +280 -96
- mindspore/include/mindspore/ccsrc/pynative/grad/function/func_pass.h +0 -1
- mindspore/include/mindspore/ccsrc/pynative/grad/function.h +28 -23
- mindspore/include/mindspore/ccsrc/pynative/grad/function_py.h +19 -11
- mindspore/include/mindspore/ccsrc/pynative/grad/grad.h +30 -97
- mindspore/include/mindspore/ccsrc/pynative/grad/grad_utils.h +39 -23
- mindspore/include/mindspore/ccsrc/pynative/grad/hook_py.h +21 -22
- mindspore/include/mindspore/ccsrc/pynative/grad/jit/jit_grad.h +2 -26
- mindspore/include/mindspore/ccsrc/pynative/grad/top_cell.h +8 -150
- mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_api.h +564 -0
- mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_core.h +564 -0
- mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/tensor_func_utils.h +499 -484
- mindspore/include/mindspore/ccsrc/pynative/op_function/comm_handle_py.h +2 -0
- mindspore/include/mindspore/ccsrc/pynative/op_function/converter.h +11 -0
- mindspore/include/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h +2 -12
- mindspore/include/mindspore/ccsrc/pynative/predict_out_type_map.h +3 -0
- mindspore/include/mindspore/ccsrc/pynative/pynative_execute.h +6 -2
- mindspore/include/mindspore/ccsrc/pynative/pynative_utils.h +39 -43
- mindspore/include/mindspore/ccsrc/runtime/collective/collective_communication_lib.h +17 -1
- mindspore/include/mindspore/ccsrc/runtime/collective/communication_group.h +5 -0
- mindspore/include/mindspore/ccsrc/runtime/collective/dummy_collective_communication_lib.h +2 -1
- mindspore/include/mindspore/ccsrc/runtime/device/device_address_utils.h +55 -50
- mindspore/include/mindspore/ccsrc/runtime/device/memory_scheduler.h +2 -1
- mindspore/include/mindspore/ccsrc/runtime/device/move_to.h +3 -0
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/auto_mem_offload.h +0 -1
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/capture_graph.h +35 -0
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/hal_res_base.h +20 -17
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/hal_res_manager.h +7 -3
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/loadable_device_address.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/memory_manager.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/swap_manager.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/tensor_array.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/device/res_manager/utils/utils.h +0 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.h +46 -33
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.h +30 -19
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.h +4 -3
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h +0 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/any_type_kernel_actor.h +7 -61
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_runner.h +74 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_switch_runner.h +89 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.h +37 -41
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/entrance_actor.h +11 -11
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h +8 -9
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/gather_actor.h +6 -7
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/stack_actor.h +11 -11
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/switch_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/copy_actor.h +12 -14
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.h +28 -20
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/data_source_actor.h +10 -58
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.h +11 -10
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_aware_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/fusion/fusion_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h +90 -83
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_infer_actor.h +3 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.h +11 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_resize_actor.h +3 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_infer_actor.h +4 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_resize_actor.h +4 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_runner.h +405 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h +11 -11
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_alloc_actor.h +3 -3
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h +2 -3
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_swap_actor.h +4 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory_aware_actor.h +7 -7
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory_manager_actor.h +18 -17
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.h +13 -7
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/profiler_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/recorder_actor.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/mux_send_actor.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.h +6 -10
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/rpc_actor.h +2 -14
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/send_actor.h +4 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.h +75 -57
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/any_type_graph_scheduler.h +0 -33
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.h +13 -1
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.h +1 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/device_tensor_copy_store.h +14 -14
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/device_tensor_store.h +28 -27
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/execution_order_check/comm_execution_order_check.h +17 -7
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/execution_order_check/kernel_cache.h +24 -4
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_capture/graph_capture_manager.h +117 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.h +4 -71
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_parameter_store.h +88 -142
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.h +9 -22
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h +4 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/async_lf_queue.h +97 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/lf_ring_queue.h +205 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/runtime_pipeline.h +71 -0
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pre_launch_comm.h +10 -2
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.h +4 -13
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.h +12 -1
- mindspore/include/mindspore/ccsrc/runtime/hardware/device_context.h +44 -173
- mindspore/include/mindspore/ccsrc/runtime/hardware/device_context_manager.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/pipeline/async_rqueue.h +2 -2
- mindspore/include/mindspore/ccsrc/runtime/pipeline/ring_queue.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/pipeline/task/task.h +1 -1
- mindspore/include/mindspore/ccsrc/runtime/pynative/graph_adapter.h +0 -1
- mindspore/include/mindspore/ccsrc/runtime/pynative/ir_converter.h +8 -7
- mindspore/include/mindspore/ccsrc/runtime/pynative/op_runner.h +6 -6
- mindspore/include/mindspore/ccsrc/runtime/pynative/op_runtime_info.h +4 -4
- mindspore/include/mindspore/ccsrc/utils/dlopen_macro.h +2 -2
- mindspore/include/mindspore/core/include/abstract/abstract_function.h +54 -13
- mindspore/include/mindspore/core/include/abstract/abstract_value.h +66 -3
- mindspore/include/mindspore/core/include/abstract/ops/primitive_infer_map.h +1 -1
- mindspore/include/mindspore/core/include/abstract/param_validator.h +3 -2
- mindspore/include/mindspore/core/include/base/bfloat16.h +1 -1
- mindspore/include/mindspore/core/include/base/float16.h +4 -3
- mindspore/include/mindspore/core/include/base/float8_e4m3fn.h +264 -0
- mindspore/include/mindspore/core/include/base/float8_e5m2.h +260 -0
- mindspore/include/mindspore/core/include/base/hifloat8.h +54 -58
- mindspore/include/mindspore/core/include/ir/anf.h +37 -8
- mindspore/include/mindspore/core/include/ir/device_sync.h +17 -1
- mindspore/include/mindspore/core/include/ir/dtype/number.h +123 -9
- mindspore/include/mindspore/core/include/ir/dtype/op_dtype.h +48 -0
- mindspore/include/mindspore/core/include/ir/dtype.h +4 -0
- mindspore/include/mindspore/core/include/ir/func_graph.h +2 -0
- mindspore/include/mindspore/core/include/ir/func_graph_cloner.h +2 -0
- mindspore/include/mindspore/core/include/ir/meta_grad_data.h +4 -13
- mindspore/include/mindspore/core/include/ir/primitive.h +34 -2
- mindspore/include/mindspore/core/include/ir/scalar.h +2 -2
- mindspore/include/mindspore/core/include/ir/scope.h +16 -3
- mindspore/include/mindspore/core/include/ir/tensor.h +922 -41
- mindspore/include/mindspore/core/include/ir/tensor_py_wrapperbase.h +11 -11
- mindspore/include/mindspore/core/include/ir/tensor_storage_info.h +1 -0
- mindspore/include/mindspore/core/include/load_mindir/infer_mindir.h +3 -2
- mindspore/include/mindspore/core/include/mindapi/base/macros.h +3 -3
- mindspore/include/mindspore/core/include/mindapi/base/type_id.h +3 -0
- mindspore/include/mindspore/core/include/mindapi/base/types.h +7 -0
- mindspore/include/mindspore/core/include/ops/op_def.h +2 -31
- mindspore/include/mindspore/core/include/symbolic_shape/operation_builder.h +1 -1
- mindspore/include/mindspore/core/include/utils/anf_utils.h +2 -0
- mindspore/include/mindspore/core/include/utils/callback_handler.h +1 -1
- mindspore/include/mindspore/core/include/utils/compact_set.h +4 -0
- mindspore/include/mindspore/core/include/utils/core_op_utils.h +1 -1
- mindspore/include/mindspore/core/include/utils/device_manager_conf.h +4 -0
- mindspore/include/mindspore/core/include/utils/flags.h +0 -2
- mindspore/include/mindspore/core/include/utils/info.h +7 -0
- mindspore/include/mindspore/core/include/utils/llm_manager.h +2 -0
- mindspore/include/mindspore/core/include/utils/log_adapter.h +11 -2
- mindspore/include/mindspore/core/include/utils/ms_context.h +13 -11
- mindspore/include/mindspore/core/include/utils/ms_exception.h +42 -5
- mindspore/include/mindspore/core/include/utils/ms_utils.h +4 -8
- mindspore/include/mindspore/core/include/utils/ms_utils_secure.h +1 -1
- mindspore/include/mindspore/core/include/utils/phase.h +17 -2
- mindspore/include/mindspore/core/include/utils/system/base.h +1 -1
- mindspore/include/mindspore/core/include/utils/tensor_hook_map.h +30 -0
- mindspore/include/mindspore/core/mindrt/include/actor/op_actor.h +68 -0
- mindspore/include/mindspore/core/mindrt/include/async/async.h +2 -2
- mindspore/include/mindspore/core/mindrt/include/thread/actor_threadpool.h +4 -0
- mindspore/include/mindspore/core/mindrt/include/thread/core_affinity.h +1 -1
- mindspore/include/mindspore/core/mindrt/include/thread/hqueue.h +6 -6
- mindspore/include/mindspore/core/mindrt/include/thread/threadpool.h +6 -2
- mindspore/include/mindspore/ops/grad/grad_utils.h +25 -3
- mindspore/include/mindspore/ops/infer/all_gather_v.h +39 -0
- mindspore/include/mindspore/ops/infer/all_to_all.h +38 -0
- mindspore/include/mindspore/ops/infer/dtype.h +12 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/acosh.h +2 -9
- mindspore/include/mindspore/ops/infer/ops_func_impl/asinh.h +2 -9
- mindspore/include/mindspore/ops/infer/ops_func_impl/atanh.h +4 -9
- mindspore/include/mindspore/ops/infer/ops_func_impl/batch_norm_ext.h +6 -11
- mindspore/include/mindspore/ops/infer/ops_func_impl/batch_norm_grad_ext.h +5 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_and_scalar.h +4 -5
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_and_tensor.h +3 -5
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_or_scalar.h +3 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_or_tensor.h +2 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_xor_scalar.h +3 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_xor_tensor.h +2 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/broadcast_to.h +0 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/broadcast_to_view.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/cell_backward_hook.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/chunk.h +0 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/chunk_view.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/cross_entropy_loss.h +36 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/cross_entropy_loss_grad.h +36 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/diagonal_view.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/dist_comm_all_gather_into_tensor_uneven.h +33 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/dist_comm_reduce_scatter_tensor_uneven.h +33 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/dump_gradient.h +33 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/dynamic_ntk.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/eltwise_op.h +4 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/empty.h +7 -3
- mindspore/include/mindspore/ops/infer/ops_func_impl/empty_like.h +7 -3
- mindspore/include/mindspore/ops/infer/ops_func_impl/exp.h +3 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/expand_dims.h +1 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/expand_dims_view.h +31 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/fused_add_topk_div.h +56 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul.h +13 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_base.h +8 -9
- mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_v2.h +7 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_v4.h +3 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/inner_moe_token_unpermute.h +36 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_bernoulli_scalar.h +25 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_bernoulli_tensor.h +40 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_matmul_add.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_remainder_tensor_scalar.h +35 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_remainder_tensor_tensor.h +35 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_silu.h +35 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/kv_scale_cache.h +48 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/masked_fill.h +4 -3
- mindspore/include/mindspore/ops/infer/ops_func_impl/masked_scatter.h +37 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_fusion_utils.h +6 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_fastgelu_add_mul_out1.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_mul_out1.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_out2.h +1 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_grad_with_indices.h +2 -8
- mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_grad_with_mask.h +4 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_with_indices.h +6 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_with_mask.h +6 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/mla.h +54 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/mla_preprocess.h +75 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/moe_distribute_combine.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/moe_distribute_dispatch.h +37 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/moe_init_routing_quant_v2.h +39 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/narrow.h +0 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/narrow_view.h +29 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/neg.h +1 -6
- mindspore/include/mindspore/ops/infer/ops_func_impl/new_empty.h +7 -3
- mindspore/include/mindspore/ops/infer/ops_func_impl/new_full.h +37 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/normal_float_float.h +1 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/ones_like.h +2 -6
- mindspore/include/mindspore/ops/infer/ops_func_impl/ones_like_ext.h +1 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/q_matmul_split_silu_fastgelu_add_mul_out1.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/q_matmul_split_silu_mul_out1.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/quant_matmul.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/reciprocal.h +4 -9
- mindspore/include/mindspore/ops/infer/ops_func_impl/reduce_any.h +4 -5
- mindspore/include/mindspore/ops/infer/ops_func_impl/reduce_arithmetic.h +2 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/remainder_tensor_scalar.h +4 -4
- mindspore/include/mindspore/ops/infer/ops_func_impl/ring_attention_update.h +36 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/select_ext_view.h +39 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/sigmoid.h +4 -5
- mindspore/include/mindspore/ops/infer/ops_func_impl/slice_ext.h +0 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/slice_ext_view.h +29 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/split_tensor.h +0 -1
- mindspore/include/mindspore/ops/infer/ops_func_impl/split_tensor_view.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/split_with_size.h +0 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/split_with_size_view.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/square.h +2 -10
- mindspore/include/mindspore/ops/infer/ops_func_impl/swiglu_dynamic_quant.h +32 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/topprouter.h +36 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/transpose.h +0 -2
- mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_ext_view.h +34 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_view.h +29 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/unstack_ext_view.h +37 -0
- mindspore/include/mindspore/ops/infer/ops_func_impl/zeros_like_ext.h +1 -2
- mindspore/include/mindspore/ops/infer/reduce_scatter.h +3 -1
- mindspore/include/mindspore/ops/infer/reduce_scatter_v.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/acl/acl_kernel_mod.h +3 -0
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/acl_adapter_info.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_aclnn_utils.h +95 -0
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_op_api_cache.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_op_api_exec.h +84 -0
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_cache.h +18 -8
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_convert.h +40 -114
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_exec.h +41 -32
- mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_util.h +6 -0
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/common/kernel_base.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/common/kernel_log.h +11 -11
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/format_transfer/formats_definitions.h +5 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/format_transfer/register_format_transfer.h +5 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/inc/ms_cpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/concat.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/dct.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/dctn.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fft_ortho.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fft_shapecopy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftbase.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftfreq.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftnbase.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/irfft_double.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/nms_with_mask.h +0 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/random/philox_random_dist.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/random/random_distributions.h +27 -25
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/topprouter.h +64 -0
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h +18 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/fused_sparse_utils.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/kernel_util.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/philox_random.h +75 -138
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/range_sampler.h +7 -3
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sampling_kernels.h +18 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sparse_group.h +18 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sparse_tensor.h +18 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_avg_pool_3d_grad_op.h +0 -11
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_avg_pool_3d_op.h +0 -11
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool3_d_grad_op.h +0 -14
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool3d_op.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool_2d_grad_op.h +0 -14
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adjust_contrastv2_op.h +0 -21
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/arg_max_op.h +0 -22
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/bartlett_window_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/cauchy_op.h +0 -11
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/cholesky_solve_op.h +0 -23
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/coalesce_op.h +0 -24
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/csr_sparse_matrix_to_dense_op.h +0 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/dense_to_csr_sparse_matrix_op.h +0 -16
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/eig_op.h +0 -17
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/exp.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/fractional_max_pool_grad_with_fixed_ksize_op.h +0 -22
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/fractional_max_pool_with_fixed_ksize_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/geqrf_op.h +0 -14
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/glu_grad_op.h +0 -17
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/glu_op.h +0 -20
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/hamming_window_op.h +0 -20
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/index_fill.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/instance_norm_v2_grad.h +0 -28
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/layer_norm_grad_grad_op.h +0 -17
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/log_normal_reverse.h +0 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/logspace.h +0 -23
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/lstsq_op.h +0 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/matrix_logarithm.h +0 -13
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/matrix_power_op.h +0 -16
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/max_pool_3d_grad_with_argmax_op.h +0 -26
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/maximum_grad_grad.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/median_grad_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/median_op.h +0 -17
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/minimum_grad_grad.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/multi_margin_loss_grad_op.h +0 -24
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/multi_margin_loss_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/mvlgamma_grad_op.h +0 -17
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/mvlgamma_op.h +0 -15
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/pdist_grad_op.h +0 -21
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/segment_mean_op.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/segment_min_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_addmm.h +0 -16
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_adagrad_da.h +0 -38
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_centered_rms_prop.h +0 -47
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_momentum.h +0 -36
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_proximal_gradient_descent.h +0 -29
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_matrix_transpose_op.h +0 -29
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_mean_with_num_segments_op.h +0 -19
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_grad_op.h +0 -21
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_op.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_with_num_segments_op.h +0 -20
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_tensor_to_csr_sparse_matrix_op.h +0 -18
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sspaddmm_op.h +0 -22
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/triplet_margin_loss_op.h +0 -22
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/axis_util.h +5 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/reduce_infer_util.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/transfer_shape_according_to_format.h +5 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/drop_out_gen_mask_kernels.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/gather_grad_kernels.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/replay_buffer/replay_buffer_factory.h +2 -1
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_util.h +2 -0
- mindspore/include/mindspore/ops/kernel/ascend/availability/silent_check/ascend_silent_check.h +13 -14
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addbmm_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addmm_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addmv_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/baddbmm_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/batch_norm_ext_aclnn_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/batch_norm_grad_ext_aclnn_kernel.h +2 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/bincount_ext_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/chunk_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/contiguous_aclnn_kernel.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv1d_ext_aclnn_kernel.h +2 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv1d_padding_aclnn_kernel.h +2 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv2d_ext_aclnn_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv2d_padding_aclnn_kernel.h +3 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv3d_ext_aclnn_kernel.h +4 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/convolution_str_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/cross_entropy_loss_aclnn_kernel.h +48 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/cross_entropy_loss_grad_aclnn_kernel.h +47 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_aclnn_kernel.h +5 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_aclnn_utils.h +2 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_v2_aclnn_kernel.h +83 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/dense_aclnn_kernel.h +13 -6
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/dropout_ext_aclnn_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/empty_aclnn_kernel.h +39 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/empty_like_aclnn_kernel.h +39 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/flash_attention_score_aclnn_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/flash_attention_score_grad_aclnn_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/gather_d_grad_v2_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grid_sampler_2d_grad_aclnn_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grid_sampler_3d_grad_aclnn_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grouped_matmul_v2_aclnn_kernel.h +49 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grouped_matmul_v4_aclnn_kernel.h +6 -3
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_add_ext_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_fill_scalar_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_fill_tensor_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inner_inplace_index_put_aclnn_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inner_moe_token_unpermute_aclnn_kernel.h +45 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_bernoulli_scalar_aclnn_kernel.h +47 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_bernoulli_tensor_aclnn_kernel.h +46 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_clamp_scalar_aclnn_kernel.h +2 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_divs_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_index_add_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_normal_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_scatter_add_aclnn_kernel.h +45 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_silu_aclnn_kernel.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_sub_scalar_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_uniform_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/isinf_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/linalg_vector_norm_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/masked_scatter_aclnn_kernel.h +45 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/matmul_all_reduce_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/matmul_reduce_scatter_aclnn_kernel.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_distribute_combine_aclnn_kernel.h +56 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_distribute_dispatch_aclnn_kernel.h +55 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_init_routing_quant_v2_aclnn_kernel.h +50 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/mse_loss_ext_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/mse_loss_grad_ext_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/multinomial_ext_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/narrow_aclnn_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/new_empty_aclnn_kernel.h +39 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/new_full_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/norm_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/prod_ext_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/quant_batch_matmul_all_reduce_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/rand_ext_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randint_aclnn_kernel.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randn_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randperm_ext_aclnn_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/repeat_interleave_grad_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/ring_attention_update_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/split_with_size_aclnn_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique2_aclnn_kernel.h +3 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique_consecutive_aclnn_kernel.h +2 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique_dim_aclnn_kernel.h +3 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/broadcast_to_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/chunk_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/concat_view.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/diagonal_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/expand_dims_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/flatten_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/narrow_view.h +43 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/reshape_view.h +2 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/select_ext_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/slice_ext_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_tensor_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_view.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_with_size_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/squeeze_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/strided_slice_view.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/transpose_view.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/unstack_ext_view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/view.h +42 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/view_utils.h +0 -1
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/apply_rotary_pos_emb_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/mla_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/paged_attention_aclnn_kernel.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_kernel_mod.h +139 -23
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_kernel_utils.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/aclnn_utils.h +42 -17
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_runner.h +124 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_runner_base.h +48 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_utils.h +63 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/abs.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/acos_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/acosh_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adamw.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool2d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_layernorm_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_rms_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addbmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addcdiv_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addcmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addmv.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/all_finite.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/all_gather_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/any.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/any_ext.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/apply_rotary_pos_emb.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/arange.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmax_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmax_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmin_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmin_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argsort.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/as_strided.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/asin_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/asinh_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atan2_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atan_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/baddbmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_mat_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_elemt.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_stats.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bernoulli_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bincount_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bmm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/broadcast_to.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/broadcast_to_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cast.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ceil.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cell_backward_hook.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/chunk.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/chunk_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clone.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/col2im_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/col2im_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/concat.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/constant_pad_nd.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv1d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv1d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv2d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv2d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv3d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv3d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv_transpose2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_str.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_str_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cos.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cosh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/count_nonzero.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross_entropy_loss.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross_entropy_loss_grad.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cummax.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cummin_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cumsum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/custom_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dense.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/diag_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/diagonal_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/div.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dot.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_do_mask_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_gen_mask_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/einsum_ext.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/embedding_dense_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/empty.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/empty_like.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/equal_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erf.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erfc.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/exp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/exp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_as.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_dims.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_dims_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expm1.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/eye.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ffn_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flash_attention_score.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flash_attention_score_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flatten_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor_div_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fmod_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fmod_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/frac.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/full_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/func_dropout_ext.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/func_max_pool2d.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gather_d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gcd.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/generator.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/glu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/glu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_backward_fusion.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2_backward_fusion.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater_equal_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/group_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/group_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hardtanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/histc_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hswish.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hswish_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/im2col_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/incre_flash_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_select.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_index.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_moe_token_unpermute.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_addmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_adds_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_bernoulli_scalar.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_bernoulli_tensor.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_exp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_exponential.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor_divide.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor_divides.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_index_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_log.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_matmul_add.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_normal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_random.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_src.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_silu.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_stop_gradient.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_sub_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_uniform.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isclose.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isfinite.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isinf.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isneginf.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kl_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kl_div_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kthvalue.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/l1_loss_backward_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/l1_loss_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/layer_norm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/layer_norm_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/leaky_relu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lerp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lerp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/less.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/less_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lin_space_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/linalg_qr.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/linalg_vector_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log10.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log1p.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logaddexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logaddexp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_and.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_or.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_xor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsumexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_fill.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_scatter.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_select.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_select_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matrix_inverse_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_grad_with_indices.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_grad_with_mask.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_with_indices.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_with_mask.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_unpool2d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/maximum.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mean_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/median_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/median_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/meshgrid.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/min.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/min_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/minimum.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mish_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mish_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mla.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_distribute_combine.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_distribute_dispatch.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_finalize_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing_quant_v2.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_permute.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_unpermute.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mse_loss_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mse_loss_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multinomial_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mv.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nan_to_num.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nansum.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/narrow.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/narrow_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ne_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/neg.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_empty.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_full.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/non_zero_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_float_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_float_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_tensor_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/not_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/one_hot_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ones_like_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/outer.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/paged_attention.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pixel_shuffle.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/polar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prod_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prompt_flash_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_matmul.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rand_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rand_like_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randint.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randint_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randn.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randn_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randperm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reciprocal.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_all.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_any.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_max.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_min.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/relu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_int.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reshape.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reshape_and_cache.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reverse_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ring_attention_update.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rms_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rms_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/roll.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rotary_position_embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/round.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rsqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/searchsorted.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_ext_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/selu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/selu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sign.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silent_check_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silent_check_v3.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sin.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sinc.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sinh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice_ext_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/smooth_l1_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/soft_margin_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softmax_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softplus_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softplus_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sort_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/speed_fusion_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_tensor_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_with_size.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_with_size_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/square.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/squeeze.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/stack_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/std.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/std_mean.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/swiglu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/swiglu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/t_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/take.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tan.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/threshold_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tile.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/topk_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/trace_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_ext_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/triangular_solve.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tril_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/triu.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/trunc.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/type_as.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/uniform_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique2.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique_consecutive.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unstack_ext_view.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_linear1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/var.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/var_mean.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/view.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/view_as.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/zeros_like_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adamw.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_avg_pool1d.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_avg_pool3d_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_max_pool1d.h +3 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_max_pool2d.h +3 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_layernorm_v2.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_rms_norm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_rmsnorm_quant_v2.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addbmm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addmm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addmv.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/all_finite.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/all_gather_matmul.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/any.h +34 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/any_ext.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/arange.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmax_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmax_with_value.h +4 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmin_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmin_with_value.h +4 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argsort.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool1d.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool2d.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool2d_grad.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool3d_ext.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool3d_grad_ext.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/baddbmm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_mat_mul.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_elemt.h +5 -7
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_ext.h +6 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_gather_stats_with_counts.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_grad_ext.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_stats.h +3 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bernoulli_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_grad.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_with_logits.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_with_logits_backward.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bincount_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bmm_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cell_backward_hook.h +27 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/clone.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv1d_ext.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv1d_padding.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv2d_ext.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv2d_padding.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv3d_ext.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv3d_padding.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv_transpose2d.h +6 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution_str.h +6 -7
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/count_nonzero.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross_entropy_loss.h +37 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross_entropy_loss_grad.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cummax.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cummin_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cumsum_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_kernel.h +79 -14
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_launch_aclnn.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dense.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/diag_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divmod.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divmods.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divs.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_do_mask_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_gen_mask_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_grad_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/einsum_ext.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/elu_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/elu_grad_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/embedding.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/embedding_dense_backward.h +5 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/empty.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/empty_like.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/eye.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ffn_ext.h +8 -8
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fill_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fill_tensor.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flash_attention_score.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flash_attention_score_grad.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flatten_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/floor_div.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/floor_div_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/full_like.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/func_dropout_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/func_max_pool2d.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fused_infer_attention_score.h +15 -18
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gather_d_grad_v2.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gelu_grad.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gelu_grad_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_backward_fusion.h +37 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2_backward_fusion.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grid_sampler_2d_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grid_sampler_3d_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/group_norm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/group_norm_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grouped_matmul_v4.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/hshrink.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/hshrink_grad.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/incre_flash_attention.h +11 -11
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_add_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_fill_scalar.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_fill_tensor.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_index.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_inplace_index_put.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_moe_token_unpermute.h +39 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_non_zero.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_add_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_addmm.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_adds_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_bernoulli_scalar.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_bernoulli_tensor.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_clamp_scalar.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_clamp_tensor.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_copy.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_div.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divmod.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divmods.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divs.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_elu.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_erfinv.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_exp.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_exponential.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_diagonal.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_tensor.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor_divide.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor_divides.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_grouped_matmul_add.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_hardtanh.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_index_add.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_index_put.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_log.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_masked_fill_scalar.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_masked_fill_tensor.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_matmul_add.h +34 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_mul.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_muls.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_normal.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_put.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_random.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_relu.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_remainder_tensor_scalar.h +34 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_remainder_tensor_tensor.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_add.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_src.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_src_reduce.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_value.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_value_reduce.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_silu.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_stop_gradient.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_sub_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_sub_scalar.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_tanh.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_threshold.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_uniform.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_zero.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/isclose.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/isinf.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kl_div.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kl_div_grad.h +3 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kv_cache_scatter_update.h +3 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/l1_loss_backward_ext.h +3 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/l1_loss_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/layer_norm_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/layer_norm_grad_ext.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lerp.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lerp_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lin_space_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/linalg_qr.h +1 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/linalg_vector_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/log_softmax_ext.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/log_softmax_grad.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_fill.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_scatter.h +35 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_select.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_select_grad.h +2 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_allreduce_add_rmsnorm.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_reduce_scatter.h +5 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_grad_with_indices.h +6 -7
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_grad_with_mask.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_with_indices.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_with_mask.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_unpool2d_ext.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mean_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/meshgrid.h +4 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mm_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_distribute_combine.h +45 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_distribute_dispatch.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_permute.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_unpermute.h +4 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_unpermute_grad.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/move_to.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mse_loss_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mse_loss_grad_ext.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/muls.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multi_scale_deformable_attn.h +5 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multi_scale_deformable_attn_grad.h +4 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multinomial_ext.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mv.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nan_to_num.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nansum.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ne_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_empty.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_full.h +36 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_ones.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_2d.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_2d_grad.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_grad.h +4 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/non_zero_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_float_float.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_float_tensor.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_tensor_float.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_tensor_tensor.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/one_hot_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ones.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ones_like_ext.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/outer.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pixel_shuffle.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pow_scalar_tensor.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pow_tensor_scalar.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/prod_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/prompt_flash_attention.h +7 -7
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_batch_matmul.h +7 -7
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_matmul.h +41 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_v2.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rand_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rand_like_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randint.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randint_like.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randn.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randn_like.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randperm_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/reduce_all.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/relu_grad.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_int.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_tensor.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/reshape.h +4 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ring_attention_update.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rms_norm.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rotary_position_embedding_grad.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/round.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/scatter_add_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/searchsorted.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sigmoid_grad.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/silent_check_v2.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/silent_check_v3.h +6 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/smooth_l1_loss.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/smooth_l1_loss_grad.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/soft_margin_loss.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/soft_margin_loss_grad.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softmax.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softshrink.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softshrink_grad.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sort_ext.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/speed_fusion_attention.h +4 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/speed_fusion_attention_grad.h +11 -12
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/square.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/std.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/std_mean.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sub.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sub_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sum_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/t_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/take.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/tanh_grad.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/threshold.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/threshold_grad.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/tile.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/type_as.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/uniform_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique2.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique_consecutive.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique_dim.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unstack_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bicubic2d.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bicubic2d_grad.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bilinear2d.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bilinear2d_grad.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_linear1d.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_linear1d_grad.h +5 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest1d.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest1d_grad.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest2d.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest2d_grad.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest3d.h +3 -4
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest3d_grad.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_trilinear3d.h +4 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_trilinear3d_grad.h +6 -6
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/var.h +3 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/var_mean.h +1 -1
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/view_as.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/weight_quant_batch_matmul.h +5 -5
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/zeros.h +2 -2
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/zeros_like_ext.h +2 -3
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/apply_rotary_pos_emb.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/flash_attention_score.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/mla.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/paged_attention.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/reshape_and_cache.h +40 -0
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/functions/functions.h +35 -0
- mindspore/include/mindspore/ops/kernel/cpu/empty_cpu_kernel.h +55 -0
- mindspore/include/mindspore/ops/kernel/cpu/empty_like_cpu_kernel.h +55 -0
- mindspore/include/mindspore/ops/kernel/cpu/grid_sampler_2d_grad_cpu_kernel.h +3 -15
- mindspore/include/mindspore/ops/kernel/cpu/map_tensor/map_tensor_get_data_cpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/map_tensor/map_tensor_get_grad_cpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/masked_fill_cpu_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/cpu/multi_margin_loss_cpu_kernel.h +6 -6
- mindspore/include/mindspore/ops/kernel/cpu/new_empty_cpu_kernel.h +55 -0
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_avx512_instructions.h +0 -3
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_avx_instructions.h +0 -3
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_sse_instructions.h +0 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/abs.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/acos_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/addcdiv_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/addcmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/any.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/any_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/apply_rotary_pos_emb.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argmax_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argmin_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argsort.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/as_strided.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/atan_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/atanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/baddbmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_mat_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_elemt.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_stats.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bernoulli_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bmm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/broadcast_to.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/broadcast_to_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cast.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ceil.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cell_backward_hook.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/chunk.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/chunk_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clone.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/col2im_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/col2im_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/concat.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/constant_pad_nd.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv1d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv2d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv2d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv3d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv_transpose2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_str.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_str_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cos.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cosh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/count_nonzero.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross_entropy_loss.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross_entropy_loss_grad.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cummax.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dense.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/diag_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/diagonal_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_barrier.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_isend.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/div.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dot.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/einsum_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/embedding_dense_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/empty.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/empty_like.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/equal_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erf.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erfc.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/exp.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/exp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expand_dims.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expand_dims_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expm1.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/eye.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ffn_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor_div_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/fmod_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/frac.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/full_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/func_dropout_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gather_d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gcd.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/generator.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/glu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/glu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater_equal_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/group_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/group_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hardtanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hswish.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hswish_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/im2col_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/incre_flash_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_index.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_moe_token_unpermute.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_addmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_bernoulli_scalar.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_bernoulli_tensor.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_exponential.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor_divide.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor_divides.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_index_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_log.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_matmul_add.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_normal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_random.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_src.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_silu.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_uniform.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isclose.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isfinite.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isinf.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isneginf.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kl_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kl_div_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kthvalue.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/layer_norm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/leaky_relu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/lerp.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/lerp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/less.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/less_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/linalg_qr.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/linalg_vector_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log1p.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log_softmax_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logaddexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logaddexp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_and.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_or.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_xor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsumexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_fill.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_scatter.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_select.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_select_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/max.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/max_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/maximum.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mean_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/median_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/median_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/meshgrid.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/min.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/min_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/minimum.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mish_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mish_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mla.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_distribute_combine.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_distribute_dispatch.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_finalize_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing_quant_v2.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_permute.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_unpermute.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mv.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nan_to_num.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ne_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/neg.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_empty.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_full.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_float_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_float_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_tensor_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/not_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/outer.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/paged_attention.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pixel_shuffle.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/polar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prod_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_matmul.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randint.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randint_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randn.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randn_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reciprocal.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_all.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_any.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_max.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_min.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/relu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/repeat.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reshape.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reshape_and_cache.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reverse_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rms_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/roll.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rotary_position_embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/round.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rsqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/searchsorted.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/selu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/selu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sign.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/silu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/silu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sin.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sinc.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sinh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/smooth_l1_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/soft_margin_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softmax_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softplus_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softplus_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/speed_fusion_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/split.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/square.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/squeeze.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/stack_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/take.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tan.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/threshold_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tile.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/triangular_solve.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/triu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/trunc.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/unique_consecutive.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_linear1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/view.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/zeros_like_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/any.h +34 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/any_ext.h +35 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/binary_cross_entropy_with_logits.h +4 -4
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/cell_backward_hook.h +27 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/clamp_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/clamp_tensor.h +2 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dense.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_all_reduce.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_barrier.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_broadcast.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_irecv.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_isend.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/divmod.h +2 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/empty.h +35 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/empty_like.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/group_norm.h +3 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/inner_comm_all_reduce.h +2 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/inplace_copy.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/layer_norm_ext.h +3 -3
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/masked_select.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/matmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/max.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/mean_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/meshgrid.h +4 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/min.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_empty.h +36 -0
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_ones.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_zeros.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pixel_shuffle.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/prod_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/reshape.h +4 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/round.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/searchsorted.h +3 -4
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/silu.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/silu_grad.h +1 -2
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/sum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/unique_consecutive.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/sample_distorted_bounding_box_v2_cpu_kernel.h +4 -0
- mindspore/include/mindspore/ops/kernel/cpu/sequence/bool_binary_arithmetic_cpu_kernel.h +42 -0
- mindspore/include/mindspore/ops/kernel/cpu/sequence/sequence_len_cpu_kernel.h +0 -1
- mindspore/include/mindspore/ops/kernel/cpu/sparse_apply_adagrad_cpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/cpu/sparse_apply_adagrad_v2_cpu_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/cpu/stft_cpu_kernel.h +16 -16
- mindspore/include/mindspore/ops/kernel/cpu/utils/sampling_kernels.h +18 -15
- mindspore/include/mindspore/ops/kernel/gpu/arrays/broadcast_to_gpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/arrays/contiguous_gpu_kernel.h +9 -6
- mindspore/include/mindspore/ops/kernel/gpu/arrays/select_gpu_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/arrays/unique_consecutive_gpu_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/cuda_impl/cuda_class/unique_consecutive_helper.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/dynamic_akg/dynamic_utils.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/gpu_kernel.h +3 -4
- mindspore/include/mindspore/ops/kernel/gpu/math/binary_ext_ops_gpu_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/math/correlate_gpu_kernel.h +15 -11
- mindspore/include/mindspore/ops/kernel/gpu/math/eps_gpu_kernel.h +4 -4
- mindspore/include/mindspore/ops/kernel/gpu/math/tracev2_grad_gpu_kernel.h +0 -1
- mindspore/include/mindspore/ops/kernel/gpu/nccl/nccl_recv_gpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/nccl/nccl_send_gpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/nn/activation_grad_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/gpu/nn/adagrad_gpu_kernel.h +4 -2
- mindspore/include/mindspore/ops/kernel/gpu/nn/adam_gpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/nn/batch_norm_grad_gpu_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/gpu/nn/kl_div_loss_grad_kernel.h +3 -2
- mindspore/include/mindspore/ops/kernel/gpu/other/dynamic_stitch_gpu_kernel.h +6 -6
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/abs.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/addcdiv_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/addcmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/any.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/any_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/apply_rotary_pos_emb.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argmax_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argmin_with_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argsort.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/as_strided.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/atanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/baddbmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_mat_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_elemt.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_stats.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bernoulli_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bmm_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/broadcast_to.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/broadcast_to_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cast.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ceil.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cell_backward_hook.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/chunk.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/chunk_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clone.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/col2im_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/col2im_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/concat.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/constant_pad_nd.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv1d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv2d_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv2d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv3d_padding.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv_transpose2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_str.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_str_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cos.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cosh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/count_nonzero.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross_entropy_loss.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross_entropy_loss_grad.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cummax.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dense.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/diag_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/diagonal_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_barrier.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_isend.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/div.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dot.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/einsum_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/embedding_dense_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/equal_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erf.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erfc.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/exp.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/exp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expand_dims.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expand_dims_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expm1.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/eye.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ffn_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor_div_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/fmod_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/frac.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/full_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/func_dropout_ext.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gather_d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gcd.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/generator.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/glu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/glu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater_equal_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/group_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/group_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hardtanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hswish.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hswish_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/im2col_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/incre_flash_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_irecv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_index.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_moe_token_unpermute.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_addmm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_bernoulli_scalar.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_bernoulli_tensor.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divmod.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divmods.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divs.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_elu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_erfinv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_exponential.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor_divide.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor_divides.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_hardtanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_index_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_index_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_log.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_matmul_add.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_normal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_put.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_random.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_add.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_src.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_silu.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_uniform.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isclose.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isfinite.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isinf.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isneginf.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kl_div.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kl_div_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kthvalue.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/leaky_relu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/lerp.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/lerp_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/less.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/less_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/linalg_qr.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/linalg_vector_norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log1p.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log_softmax_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logaddexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logaddexp2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_and.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_not.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_or.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_xor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsumexp.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_fill.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_scatter.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_select.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_select_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/max.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/max_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/maximum.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mean_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/median_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/median_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/meshgrid.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/min.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/min_dim.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/minimum.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mish_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mish_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mla.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_distribute_combine.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_distribute_dispatch.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_finalize_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing_quant_v2.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_permute.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_unpermute.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/muls.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mv.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nan_to_num.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ne_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/neg.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_full.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/norm.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_float_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_float_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_tensor_float.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/not_equal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ones.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/outer.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/paged_attention.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pixel_shuffle.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/polar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prelu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prelu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prod_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_matmul.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randint.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randint_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randn.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randn_like.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reciprocal.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_all.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_any.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_max.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_min.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/relu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/relu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/repeat.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reshape.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reshape_and_cache.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reverse_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rms_norm_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/roll.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rotary_position_embedding.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/round.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rsqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter_add_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter_value.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/searchsorted.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_v2.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/selu_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/selu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sigmoid.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sigmoid_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sign.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/silu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/silu_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sin.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sinc.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sinh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/smooth_l1_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/soft_margin_loss.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softmax.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softmax_backward.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softplus_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softplus_grad_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softshrink.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softshrink_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/speed_fusion_attention.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/split.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sqrt.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/square.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/squeeze.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/take.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tan.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tanh.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tanh_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/threshold.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/threshold_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tile.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_ext_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_view.h +38 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/triangular_solve.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/triu.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/trunc.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_linear1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest1d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest2d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/view.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/zeros.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/any.h +34 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/any_ext.h +35 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/cell_backward_hook.h +27 -0
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/clamp_scalar.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/clamp_tensor.h +2 -3
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/contiguous.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/copy.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/dense.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/divmod.h +2 -3
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/grouped_matmul.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/identity.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_all_gather.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_all_reduce.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_isend.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_reduce_scatter.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/masked_select.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/matmul_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/max.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/mean_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/meshgrid.h +4 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/min.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/new_ones.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/new_zeros.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/non_zero.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pixel_shuffle.h +2 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pow_scalar_tensor.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pow_tensor_scalar.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/prod_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/reshape.h +4 -2
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/searchsorted.h +3 -4
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/sum_ext.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/random/random_categorical_gpu_kernel.h +4 -8
- mindspore/include/mindspore/ops/kernel/gpu/rl/buffer_sample_gpu_kernel.h +8 -8
- mindspore/include/mindspore/ops/kernel/gpu/rl/gru_gpu_kernel.h +1 -0
- mindspore/include/mindspore/ops/kernel/gpu/sparse/csr_sparse_matrix_to_sparse_tensor_gpu_kernel.h +1 -1
- mindspore/include/mindspore/ops/kernel/gpu/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h +3 -3
- mindspore/include/mindspore/ops/kernel/gpu/sparse/sparse_matrix_sparse_matmul_gpu_kernel.h +2 -2
- mindspore/include/mindspore/ops/kernel/include/common/common_utils.h +9 -0
- mindspore/include/mindspore/ops/kernel/include/common/device_address.h +290 -33
- mindspore/include/mindspore/ops/kernel/include/common/device_type.h +3 -5
- mindspore/include/mindspore/ops/kernel/include/common/kernel.h +26 -1
- mindspore/include/mindspore/ops/kernel/include/common/kernel_tensor.h +135 -306
- mindspore/include/mindspore/ops/op_def/array_op_name.h +0 -1
- mindspore/include/mindspore/ops/op_def/array_ops.h +0 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_lite_ops.h +3224 -2884
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_def.h +814 -759
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_a.h +63 -61
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_b.h +28 -27
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_c.h +38 -34
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_d.h +29 -25
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_e.h +28 -26
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_f.h +22 -19
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_g.h +25 -21
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_h.h +6 -6
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_i.h +74 -66
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_k.h +3 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_l.h +32 -32
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_m.h +46 -38
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_n.h +19 -17
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_o.h +2 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_p.h +6 -7
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_q.h +6 -3
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_r.h +61 -60
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_s.h +64 -60
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_t.h +19 -17
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_u.h +14 -14
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_v.h +2 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_x.h +1 -1
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_z.h +1 -1
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_a.h +63 -61
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_b.h +28 -27
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_c.h +38 -34
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_d.h +29 -25
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_e.h +28 -26
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_f.h +22 -19
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_g.h +25 -21
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_h.h +6 -6
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_i.h +74 -66
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_k.h +3 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_l.h +32 -32
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h +46 -38
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_n.h +19 -17
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_o.h +2 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_p.h +6 -7
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_q.h +6 -3
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h +61 -60
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h +64 -60
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h +19 -17
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_u.h +14 -14
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_v.h +2 -2
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_x.h +1 -1
- mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_z.h +1 -1
- mindspore/include/mindspore/ops/op_def/framework_op_name.h +0 -1
- mindspore/include/mindspore/ops/op_def/framework_ops.h +3 -2
- mindspore/include/mindspore/ops/op_def/nn_op_name.h +4 -0
- mindspore/include/mindspore/ops/op_def/op_enum.h +4 -0
- mindspore/include/mindspore/ops/op_def/other_op_name.h +6 -0
- mindspore/include/mindspore/ops/op_def/other_ops.h +2 -0
- mindspore/include/mindspore/ops/op_def/structure_ops.h +12 -3
- mindspore/include/mindspore/ops/ops_utils/memory_overlap.h +4 -5
- mindspore/include/mindspore/ops/ops_utils/op_constants.h +13 -0
- mindspore/include/mindspore/ops/ops_utils/op_utils.h +14 -16
- mindspore/include/mindspore/ops/ops_utils/type_dispatch.h +51 -42
- mindspore/include/mindspore/ops/view/as_strided_strides_calc.h +5 -1
- mindspore/include/mindspore/ops/view/broadcast_to_strides_calc.h +7 -2
- mindspore/include/mindspore/ops/view/broadcast_to_view_strides_calc.h +34 -0
- mindspore/include/mindspore/ops/view/chunk_strides_calc.h +5 -1
- mindspore/include/mindspore/ops/view/chunk_view_strides_calc.h +35 -0
- mindspore/include/mindspore/ops/view/diagonal_strides_calc.h +6 -3
- mindspore/include/mindspore/ops/view/diagonal_view_strides_calc.h +32 -0
- mindspore/include/mindspore/ops/view/expand_dims_strides_calc.h +5 -1
- mindspore/include/mindspore/ops/view/expand_dims_view_strides_calc.h +34 -0
- mindspore/include/mindspore/ops/view/narrow_strides_calc.h +4 -2
- mindspore/include/mindspore/ops/view/narrow_view_strides_calc.h +33 -0
- mindspore/include/mindspore/ops/view/reshape_strides_calc.h +2 -1
- mindspore/include/mindspore/ops/view/select_ext_view_strides_calc.h +33 -0
- mindspore/include/mindspore/ops/view/slice_ext_strides_calc.h +7 -1
- mindspore/include/mindspore/ops/view/slice_ext_view_strides_calc.h +35 -0
- mindspore/include/mindspore/ops/view/slice_strides_calc.h +4 -1
- mindspore/include/mindspore/ops/view/split_strides_calc.h +3 -1
- mindspore/include/mindspore/ops/view/split_tensor_strides_calc.h +5 -2
- mindspore/include/mindspore/ops/view/split_tensor_view_strides_calc.h +33 -0
- mindspore/include/mindspore/ops/view/split_with_size_strides_calc.h +5 -1
- mindspore/include/mindspore/ops/view/split_with_size_view_strides_calc.h +36 -0
- mindspore/include/mindspore/ops/view/squeeze_strides_calc.h +3 -0
- mindspore/include/mindspore/ops/view/transpose_ext_view_strides_calc.h +33 -0
- mindspore/include/mindspore/ops/view/transpose_strides_calc.h +3 -1
- mindspore/include/mindspore/ops/view/transpose_view_strides_calc.h +32 -0
- mindspore/include/mindspore/ops/view/unstack_ext_view_strides_calc.h +28 -0
- mindspore/include/mindspore/ops/view/view_strides_calc.h +5 -2
- mindspore/include/mindspore/ops/view/view_strides_calculator.h +1 -2
- mindspore/include/ms_extension.h +12 -5
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libicuuc.so.74 +0 -0
- mindspore/lib/libmindspore_backend_common.so +0 -0
- mindspore/lib/libmindspore_backend_manager.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_dump.so +0 -0
- mindspore/lib/libmindspore_extension.so +0 -0
- mindspore/lib/libmindspore_frontend.so +0 -0
- mindspore/lib/libmindspore_ge_backend.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_memory_pool.so +0 -0
- mindspore/lib/libmindspore_ms_backend.so +0 -0
- mindspore/lib/libmindspore_ops.so +0 -0
- mindspore/lib/libmindspore_ops_kernel_common.so +0 -0
- mindspore/lib/libmindspore_profiler.so +0 -0
- mindspore/lib/libmindspore_pyboost.so +0 -0
- mindspore/lib/libmindspore_pynative.so +0 -0
- mindspore/lib/libmindspore_res_manager.so +0 -0
- mindspore/lib/libmindspore_runtime_pipeline.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +96 -152
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/plugin/npu_supported_ops.json +6 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/all_finite.py +28 -12
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +0 -300
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/binary_info_config.json +0 -300
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -300
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/inc/op_proto.h +0 -22
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -1
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +1 -3
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libd_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_ascend_res_manager.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_atb_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_extension_ascend_atb.a +0 -0
- mindspore/lib/plugin/ascend/libmindspore_graph_ir.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_pyboost_atb_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libms_ascend_native_boost.so +0 -0
- mindspore/lib/plugin/ascend/libms_atb_boost.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/faUpdate.h +35 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/fill.h +4 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/logprobs.h +28 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/matmul.h +7 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/norm.h +8 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/params.h +3 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/scatter_elements_v2.h +39 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/fused_add_topk_div.h +42 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/kvcache.h +7 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/mla.h +55 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/mla_preprocess.h +39 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/pagedattention.h +1 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/params.h +7 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/reshape_and_cache.h +2 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/rms_norm_and_rope_and_reshape_and_cache.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/rope_q_concat.h +26 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/swiglu_quant.h +26 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/toppsample_rand.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/unpad_flash_attention.h +12 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_api.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_comm.h +4 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_types.h +2 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lccl.h +2 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc.h +53 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_args.h +116 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_base.h +57 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_func.h +33 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling.h +86 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_91093.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_910B.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_args.h +154 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_func.h +50 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc.h +5 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc_args.h +97 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc_func.h +33 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling.h +86 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_91093.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_910B.h +31 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_args.h +154 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_func.h +50 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling.h +86 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_91093.h +11 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_910B.h +12 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_args.h +38 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_func.h +14 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/base/aicpu_kernel_base.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/bin_handle.h +6 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/tensor.h +5 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/types.h +4 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/bf16/bf16_t.h +20 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/cfg/cfg_core.h +39 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/cfg/cfg_item.h +25 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/file_system/file_system.h +2 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/inifile/ini_file.h +2 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log.h +7 -7
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log_core.h +1 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log_sink_file.h +1 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/rt/base/types.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libatb_mixops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libatb_mixops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libexp_mixops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libexp_ops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libmki.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libtbe_adapter.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/base_type.h +9 -4
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_op.h +8 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_creator.h +40 -7
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +85 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tiling_utils.h +3 -138
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcompare_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libfused_add_topk_div_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgroup_topk_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libkv_scale_cache_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libllama_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_gating_group_topk_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_init_routing_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_token_unpermute_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_gelu_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_dynamic_quant_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_matmul_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_moe_init_routing_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_paged_attention_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_reshape_and_cache_nz_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_transpose_batch_matmul_transpose_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswiglu_dynamic_quant_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libtranspose_batch_matmul_transpose_op.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_f16_310p/internal_grouped_matmul_f16_310p.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_f16_310p/internal_grouped_matmul_f16_310p_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_i8_310p/internal_grouped_matmul_i8_310p.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_i8_310p/internal_grouped_matmul_i8_310p_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libadd_rms_norm_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libadd_rms_norm_quant_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libapply_rotary_pos_emb_310p_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcast_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcompare_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libfused_add_topk_div_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libgelu_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmatmul_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_gating_group_topk_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_init_routing_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_token_unpermute_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmulti_weight_matmul_kernel_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmulti_weight_matmul_kernel_gelu_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libreshape_and_cache_nz_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_dynamic_quant_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_matmul_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_moe_init_routing_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_paged_attention_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_reshape_and_cache_nz_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_transpose_batch_matmul_transpose_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswiglu_dynamic_quant_ascend310p.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_layer_norm_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_rms_norm_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_rms_norm_quant_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libapply_rotary_pos_emb_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libcast_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libcompare_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libgelu_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libgroup_topk_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libkv_scale_cache_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libllama_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmatmul_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmoe_gating_group_topk_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmulti_weight_matmul_kernel_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libreshape_and_cache_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/librms_norm_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libswiglu_dynamic_quant_ascend910b.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libtranspose_batch_matmul_transpose_ascend910b.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/cpu/libmindspore_cpu_res_manager.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ops_ascend.so +0 -0
- mindspore/lib/plugin/libmindspore_ops_host.so +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +429 -23
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +140 -104
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +491 -623
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +4 -6
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/numpy/utils_const.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/joinedstr_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +460 -419
- mindspore/ops/auto_generate/gen_extend_func.py +1231 -1349
- mindspore/ops/auto_generate/gen_ops_def.py +6895 -6215
- mindspore/ops/auto_generate/gen_ops_prim.py +16686 -15462
- mindspore/ops/auto_generate/pyboost_inner_prim.py +342 -312
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +19 -102
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +77 -572
- mindspore/ops/function/nn_func.py +46 -94
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +4 -4
- mindspore/ops/functional_overload.py +1206 -630
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +5 -51
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +303 -177
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +8 -40
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +63 -15
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +16 -23
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +35 -14
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +13 -7
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +12 -12
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +10 -25
- mindspore/parallel/transform_safetensors.py +469 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +31 -32
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/scipy/linalg.py +2 -2
- mindspore/scipy/utils_const.py +0 -17
- mindspore/train/_utils.py +17 -7
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +98 -21
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +133 -69
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/bin/dataset-cache +0 -0
- mindspore/utils/bin/dataset-cache-server +0 -0
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/version.py +1 -1
- mindspore-2.7.0.dist-info/METADATA +368 -0
- mindspore-2.7.0.dist-info/RECORD +12024 -0
- mindspore-2.7.0.dist-info/WHEEL +5 -0
- mindspore/_deprecated/__init__.py +0 -17
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/common/auto_dynamic_shape.py +0 -504
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/proactive_fallback_expander.h +0 -39
- mindspore/include/mindspore/ccsrc/backend/common/session/session_context.h +0 -47
- mindspore/include/mindspore/ccsrc/backend/ge_backend/pass/matmul_allreduce_add_rmsnorm_fusion.h +0 -67
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/backend.h +0 -124
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/backend_base.h +0 -205
- mindspore/include/mindspore/ccsrc/backend/graph_compiler/ge_backend/ge_backend.h +0 -86
- mindspore/include/mindspore/ccsrc/debug/data_dump/data_dumper.h +0 -56
- mindspore/include/mindspore/ccsrc/debug/hooker/acl_data_adapter.h +0 -51
- mindspore/include/mindspore/ccsrc/debug/hooker/adapter.h +0 -75
- mindspore/include/mindspore/ccsrc/debug/hooker/deprecated_env.h +0 -27
- mindspore/include/mindspore/ccsrc/debug/hooker/hook_debugger.h +0 -55
- mindspore/include/mindspore/ccsrc/debug/hooker/hook_dynamic_loader.h +0 -52
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/decoder_k_v_cache_info.h +0 -58
- mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/prompt_k_v_cache_info.h +0 -59
- mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/overflow_dumper.h +0 -50
- mindspore/include/mindspore/ccsrc/include/backend/device_synchronizer.h +0 -49
- mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/constants.h +0 -174
- mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/rdma_client.h +0 -83
- mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/rdma_server.h +0 -71
- mindspore/include/mindspore/ccsrc/include/common/np_dtype/np_dtypes.h +0 -42
- mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h +0 -239
- mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/ptr_list_ref.h +0 -423
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pipeline_jit.h +0 -68
- mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/inplace_validation.h +0 -32
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h +0 -43
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.h +0 -88
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_assign.h +0 -255
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.h +0 -46
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h +0 -81
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm.h +0 -232
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/ge/ge_kernel_build.h +0 -28
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/ge/ge_kernel_mod.h +0 -78
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/model_creator.h +0 -27
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_core.h +0 -43
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_entity.h +0 -44
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink.h +0 -32
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink_file.h +0 -39
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink_stdout.h +0 -30
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_stream.h +0 -51
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/filesystem.h +0 -45
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.h +0 -44
- mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.h +0 -73
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_hash_table.h +0 -127
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_hash_table_util.h +0 -114
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.h +0 -82
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_session.h +0 -61
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.h +0 -50
- mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/julia_api.h +0 -443
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_kernel_runtime.h +0 -145
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.h +0 -43
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_inference_session.h +0 -50
- mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.h +0 -93
- mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_device_address/ascend_device_synchronizer.h +0 -45
- mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_device_address/cpu_device_synchronizer.h +0 -45
- mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/device/gpu_device_synchronizer.h +0 -44
- mindspore/include/mindspore/ccsrc/ps/core/communicator/ssl_http.h +0 -60
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_ext.h +0 -44
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_ext.h +0 -44
- mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unstack_ext.h +0 -44
- mindspore/include/mindspore/ccsrc/pynative/grad/auto_grad.h +0 -77
- mindspore/include/mindspore/ccsrc/pynative/grad/ir/bprop_tensor_replace.h +0 -58
- mindspore/include/mindspore/ccsrc/pynative/grad/ir/dynamic_shape.h +0 -204
- mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_bprop.h +0 -163
- mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_grad.h +0 -114
- mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_pass.h +0 -71
- mindspore/include/mindspore/ccsrc/pynative/grad/jit/jit_dfunctor.h +0 -28
- mindspore/include/mindspore/ccsrc/pynative/grad/variable.h +0 -466
- mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_functions.h +0 -1019
- mindspore/include/mindspore/ccsrc/runtime/device/kernel_runtime.h +0 -223
- mindspore/include/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h +0 -71
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h +0 -78
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_switch_actor.h +0 -91
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/custom_actor.h +0 -66
- mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h +0 -81
- mindspore/include/mindspore/ccsrc/runtime/hardware/deprecated_interface.h +0 -47
- mindspore/include/mindspore/core/include/base/fp8_e4m3.h +0 -263
- mindspore/include/mindspore/core/include/base/fp8_e5m2.h +0 -258
- mindspore/include/mindspore/core/include/ir/base_tensor.h +0 -1073
- mindspore/include/mindspore/ops/infer/masked_scatter.h +0 -44
- mindspore/include/mindspore/ops/infer/ops_func_impl/decoder_k_v_cache.h +0 -40
- mindspore/include/mindspore/ops/infer/ops_func_impl/gmm_backward.h +0 -28
- mindspore/include/mindspore/ops/infer/ops_func_impl/gmm_v2_backward.h +0 -28
- mindspore/include/mindspore/ops/infer/ops_func_impl/moe_token_unpermute.h +0 -36
- mindspore/include/mindspore/ops/infer/ops_func_impl/prompt_k_v_cache.h +0 -40
- mindspore/include/mindspore/ops/infer/ops_func_impl/select_ext.h +0 -40
- mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_ext.h +0 -35
- mindspore/include/mindspore/ops/infer/ops_func_impl/unstack_ext.h +0 -38
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/densetodense_set_operation.h +0 -47
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/densetosparsesetoperation.h +0 -74
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/lu.h +0 -35
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/ragged_tensor_to_sparse.h +0 -92
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/ragged_tensor_to_tensor.h +0 -120
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sample_distorted_bounding_box_ext2.h +0 -103
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/scale_and_translate.h +0 -77
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_cross.h +0 -111
- mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_sparse_maximum.h +0 -61
- mindspore/include/mindspore/ops/kernel/ascend/ascendc/op_host/decoder_kv_cache_tiling.h +0 -40
- mindspore/include/mindspore/ops/kernel/ascend/ascendc/op_host/prompt_kv_cache_tiling.h +0 -39
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv3d_padding_aclnn_kernel.h +0 -77
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_token_unpermute_aclnn_kernel.h +0 -45
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/contiguous.h +0 -41
- mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/select_ext_aclnn_kernel.h +0 -41
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_ext.h +0 -40
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_ext.h +0 -40
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unstack_ext.h +0 -40
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/customize_copy.h +0 -37
- mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unstack_ext.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/arithmetic_self_parameter.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/batch_to_space_base.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/cast_base.h +0 -74
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/concat_base.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/conv1x1_base.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/conv_common_base.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/crop_base.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/depth_to_space_base.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/fill_base.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/format_transpose.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/gather_d_base.h +0 -55
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/minimal_filtering_generator.h +0 -58
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/scatter_nd_binary.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/space_to_depth_base.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/split_with_over_lap_base.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/stack_base.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/transpose_base.h +0 -69
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/batchnorm_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/call_parameter.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/clip_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/conv3d_parameter.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/cumsum_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_gru_parameter.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_is_inf_parameter.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_masked_fill_parameter.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_parameter.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fill_parameter.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/format_transpose_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/activation_fp16.h +0 -43
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/arg_min_max_fp16.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/arithmetic_self_fp16.h +0 -57
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/batchnorm_fp16.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/cast_fp16.h +0 -94
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/common_func_fp16.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/constant_of_shape_fp16.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/conv_depthwise_fp16.h +0 -65
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/conv_fp16.h +0 -60
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/crop_fp16.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/custom_gru_fp16.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/deconv_fp16.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/deconv_winograd_fp16.h +0 -48
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/dynamic_quant_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/fill_fp16.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/gru_fp16.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/instance_norm_fp16.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/layer_norm_fp16.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/log_softmax_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/lstm_fp16.h +0 -54
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/matmul_fp16.h +0 -128
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/matrix_fp16.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/pack_fp16.h +0 -93
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/pad_fp16.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/power_fp16.h +0 -64
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/prelu_fp16.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/quant_dtype_cast_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/range_fp16.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/reduce_fp16.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/resize_fp16.h +0 -56
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/scale_fp16.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/softmax_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/sparse_to_dense_fp16.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/splice_fp16.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/topk_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/transpose_fp16.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/unique_fp16.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/utils_fp16.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/where_fp16.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/winograd_transform_fp16.h +0 -57
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/winograd_utils_fp16.h +0 -571
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/activation_grad_fp16.h +0 -44
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/arithmetic_grad.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/arithmetic_self_grad.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/batch_norm.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/convolution_grad_filter.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/convolution_grad_input.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/dropout_grad.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/gemm_fp16.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/layernorm_grad.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/pack_fp16_ext.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/pooling_grad.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/resize_grad.h +0 -45
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/strided_slice_grad.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/unsorted_segment_sum.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/adder_fp32.h +0 -47
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/arg_min_max_fp32.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/arithmetic_compare_fp32.h +0 -77
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/attention_fp32.h +0 -72
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/batchnorm_fp32.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/bias_add.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/common_func_fp32.h +0 -106
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/constant_of_shape_fp32.h +0 -52
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_1x1_avx_fp32.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_1x1_x86_fp32.h +0 -21
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_common_fp32.h +0 -60
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_depthwise_avx_fp32.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_depthwise_fp32.h +0 -148
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_im2col_avx512_fp32.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_im2col_fp32.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw.h +0 -132
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw_arm64_fp32.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw_avx_fp32.h +0 -42
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_winograd_fp32.h +0 -48
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/crop_fp32.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/cumsum_fp32.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/custom_gru_fp32.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/deconv_fp32.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/deconv_winograd_fp32.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/detection_post_process_fp32.h +0 -60
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/embedding_lookup_fp32.h +0 -43
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/group_norm_fp32.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/gru_fp32.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/instance_norm_fp32.h +0 -50
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/invert_permutation_fp32.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/l2_norm_fp32.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/layer_norm_fp32.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/local_response_norm_fp32.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/log_softmax_fp32.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/matmul_avx512_mask_fp32.h +0 -209
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/non_max_suppression_fp32.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/cast_gather_reduce_fp32.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/reduce_concat_fp32.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/split_reduce_concat_fp32.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/pad_fp32.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/prelu_fp32.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/prior_box_fp32.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/range_fp32.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/rank_fp32.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/resize_fp32.h +0 -74
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/reverse_fp32.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/reverse_sequence_fp32.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/scale_fp32.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/sparse_to_dense_fp32.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/splice_fp32.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/transpose_fp32.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/transpose_server_fp32.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/triu_tril_fp32.h +0 -42
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/unique_fp32.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/where_fp32.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_avx.h +0 -299
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_transform.h +0 -51
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_utils.h +0 -373
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/batch_norm_grad.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/batch_norm_parameter.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/binary_cross_entropy_grad.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/convolution_grad_filter.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/convolution_grad_input.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/dropout_parameter.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/layernorm_grad.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/nllloss_grad_fp32.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/optimizer.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/pack_ext.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/pooling_grad.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/reduce_grad.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/smooth_l1_loss.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/softmax_cross_entropy_with_logits.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/softmax_grad_utils.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_sparse/matmul_sparse_x1_fp32.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/gather_nd_parameter.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/gelu_parameter.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/grid_sampler_parameter.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/group_norm_parameter.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/cast_gather_reduce_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_read_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_write_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_fromtensor_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_getitem_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_reserve_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_setitem_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_stack_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/conv3d_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_is_inf_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_masked_fill_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_tensor_scatter_max_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/format_transpose_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/gather_d_infer.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/grid_sampler_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/group_norm_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/reduce_concat_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_fill_empty_rows_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_reshape_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_segment_sum_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/split_reduce_concat_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_extract_features_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_normalize_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_predict_infer.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/hashtable_lookup_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/lsh_projection_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/skip_gram_infer.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/triu_tril_infer.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/instance_norm_parameter.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/add_int8.h +0 -70
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arg_min_max_int8.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arithmetic_int8.h +0 -51
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arithmetic_self_int8.h +0 -59
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/batch_to_space_int8.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/batchnorm_int8.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/common_func_int8.h +0 -95
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/concat_int8.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv1x1_int8.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv3x3_int8.h +0 -48
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv_depthwise_int8.h +0 -49
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv_int8.h +0 -44
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/crop_int8.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/deconv_int8.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/depth_to_space_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/div_int8.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_gather_int8.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_matmul_int8.h +0 -74
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_quant_int8.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/gatherNd_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/gather_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/hswish_int8.h +0 -43
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/l2_norm_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/layer_norm_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/leaky_relu_int8.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/matmul_int8.h +0 -93
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/mul_int8.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pack_int8.h +0 -56
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pad_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pooling_int8.h +0 -50
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/power_int8.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/quant_dtype_cast_int8.h +0 -56
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/reduce_int8.h +0 -70
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/relux_int8.h +0 -43
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/reshape_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/resize_int8.h +0 -50
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/scale_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/sigmoid_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/slice_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/softmax_int8.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/space_to_batch_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/split_int8.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/squeeze_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/sub_int8.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/tanh_int8.h +0 -43
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/topk_int8.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/transpose_int8.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/unsqueeze_int8.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/avx/common_utils.h +0 -157
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/sse/sse_common.h +0 -390
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/activation.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/addn.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arg_min_max.h +0 -63
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arithmetic_compare.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arithmetic_self.h +0 -48
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/batch_norm.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/batch_to_space.h +0 -33
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/biasadd.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/cast.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/clip.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/concat.h +0 -52
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_1x1.h +0 -42
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_base.h +0 -63
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_delegate.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_3x3.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_indirect.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_sw.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_sw_avx.h +0 -40
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_arm32.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_arm64.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_avx.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_avx512.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_base.h +0 -52
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_sse.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_slidewindow.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_1x1.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_arm64.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_avx.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_arm32.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_arm64.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_avx.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_base.h +0 -65
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_sse.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/crop.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/crop_and_resize.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution_depthwise.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution_winograd.h +0 -52
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/depth_to_space.h +0 -42
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/arithmetic_compare_f16.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/arithmetic_f16.h +0 -42
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/concat_f16.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/reduce_f16.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/stack_f16.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fill.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fullconnection.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fused_batch_norm.h +0 -37
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather.h +0 -46
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather_d.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather_nd.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/group_convolution.h +0 -49
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/group_norm.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/init_vs_kernels.h +0 -20
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/layer_norm.h +0 -49
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/local_response_norm.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/log_softmax.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_arm32.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_arm64.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_avx.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_avx512.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_base.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_create.h +0 -24
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_sse.h +0 -27
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/non_max_suppression.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/non_zero.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/ones_like.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/pad.h +0 -51
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/pow.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/prelu.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/prior_box.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/range.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/rank.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/reduce.h +0 -72
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/reverse.h +0 -36
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/scale.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/shape.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/size.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/softmax.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/splice.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/stack.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/transpose.h +0 -49
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/tril.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/triu.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/unique.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/where.h +0 -44
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/zeros_like.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/l2_norm_parameter.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/local_response_norm_parameter.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/lsh_projection_parameter.h +0 -35
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/mul_parameter.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/non_max_suppression_parameter.h +0 -28
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/pack.h +0 -23
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/partial_fusion_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/predict_parameter.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/prelu_parameter.h +0 -26
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/random_parameter.h +0 -34
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/reverse_parameter.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/reverse_sequence_parameter.h +0 -45
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/scale_parameter.h +0 -39
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/scatter_elements_parameter.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/scatter_nd_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/sigmoid_parameter.h +0 -41
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/skip_gram_parameter.h +0 -30
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/sparse_to_dense_parameter.h +0 -32
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/tensor_array_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/triu_tril_parameter.h +0 -31
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/upsample_parameter.h +0 -29
- mindspore/include/mindspore/ops/kernel/cpu/nnacl/where_parameter.h +0 -25
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gmm_backward.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gmm_v2_backward.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_ext.h +0 -38
- mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_ext.h +0 -38
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gmm_backward.h +0 -38
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gmm_v2_backward.h +0 -38
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_ext.h +0 -38
- mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_ext.h +0 -38
- mindspore/include/mindspore/ops/ops_utils/ms_extension.h +0 -39
- mindspore/include/mindspore/ops/view/select_ext_strides_calc.h +0 -30
- mindspore/include/mindspore/ops/view/transpose_ext_strides_calc.h +0 -32
- mindspore/include/mindspore/ops/view/unstack_ext_strides_calc.h +0 -28
- mindspore/include/third_party/securec/src/secinput.h +0 -181
- mindspore/include/third_party/securec/src/securecutil.h +0 -574
- mindspore/include/third_party/securec/src/secureprintoutput.h +0 -153
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +0 -123
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -152
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -2048
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json +0 -2048
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -2048
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +0 -46
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +0 -51
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/build_tbe_kernel.py +0 -529
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/compiler.py +0 -56
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/custom.py +0 -1109
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/get_file_path.py +0 -36
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -241
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +0 -212
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/tbe_topi.py +0 -556
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +0 -46
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +0 -51
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -241
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +0 -212
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +0 -10
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/include/aclnn_decoder_kv_cache.h +0 -59
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/include/aclnn_prompt_kv_cache.h +0 -59
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -182
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/decoder_kv_cache.cpp +0 -192
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/decoder_kv_cache.py +0 -215
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/prompt_kv_cache.cpp +0 -274
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/prompt_kv_cache.py +0 -215
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +0 -302
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +0 -33
- mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +0 -1
- mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +0 -14
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +0 -59
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +0 -59
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +0 -192
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +0 -215
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +0 -274
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +0 -215
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -158
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -167
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -156
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -165
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/decoder_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/prompt_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +0 -892
- mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +0 -892
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- mindspore-2.6.0rc1.dist-info/METADATA +0 -367
- mindspore-2.6.0rc1.dist-info/RECORD +0 -12184
- mindspore-2.6.0rc1.dist-info/WHEEL +0 -5
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- /mindspore/include/mindspore/ops/kernel/ascend/ascendc/{op_host → all_finite/op_host}/all_finite_tiling.h +0 -0
- /mindspore/include/third_party/{securec/include → include}/securec.h +0 -0
- /mindspore/include/third_party/{securec/include → include}/securectype.h +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/infer_op_params.h
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
/*
|
|
2
2
|
* Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
* You may
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
10
|
-
* See the Mulan PSL v2 for more details.
|
|
3
|
+
* This file is a part of the CANN Open Software.
|
|
4
|
+
* Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
|
|
5
|
+
* Please refer to the License for details. You may not use this file except in compliance with the License.
|
|
6
|
+
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
|
|
7
|
+
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
|
8
|
+
* See LICENSE in the root of the software repository for the full text of the License.
|
|
11
9
|
*/
|
|
12
10
|
#ifndef ATB_INFEROPPARAM_H
|
|
13
11
|
#define ATB_INFEROPPARAM_H
|
|
@@ -33,6 +31,16 @@ namespace atb {
|
|
|
33
31
|
|
|
34
32
|
namespace infer {
|
|
35
33
|
|
|
34
|
+
//!
|
|
35
|
+
//! \enum InputLayout
|
|
36
|
+
//!
|
|
37
|
+
//! \brief 数据排布类型
|
|
38
|
+
//!
|
|
39
|
+
enum InputLayout : int {
|
|
40
|
+
TYPE_BSND = 0, //!< 默认值,表示数据排布为BSND
|
|
41
|
+
TYPE_BNSD //!< 表示数据排布为BNSD
|
|
42
|
+
};
|
|
43
|
+
|
|
36
44
|
//!
|
|
37
45
|
//! \enum QuantType
|
|
38
46
|
//!
|
|
@@ -40,11 +48,12 @@ namespace infer {
|
|
|
40
48
|
//!
|
|
41
49
|
enum QuantType : int {
|
|
42
50
|
QUANT_UNDEFINED = 0, //!< 不量化
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
QUANT_UNQUANT = 0, //!< 不量化
|
|
52
|
+
QUANT_INT4 = 1, //!< 当前不支持
|
|
53
|
+
QUANT_INT8 = 2, //!< int8量化
|
|
54
|
+
QUANT_INT16 = 3, //!< 当前不支持
|
|
55
|
+
QUANT_FLOAT8 = 4, //!< 当前不支持
|
|
56
|
+
QUANT_FLOAT16 = 5, //!< 当前不支持
|
|
48
57
|
};
|
|
49
58
|
|
|
50
59
|
//!
|
|
@@ -62,22 +71,23 @@ enum DynamicQuantType : int {
|
|
|
62
71
|
//! \enum ActivationType
|
|
63
72
|
//!
|
|
64
73
|
//! \brief 激活支持的类型
|
|
65
|
-
//!
|
|
66
|
-
//! ACTIVATION_SWIGLU_FORWARD: Atlas
|
|
67
|
-
//!
|
|
68
|
-
//!
|
|
74
|
+
//! ACTIVATION_FAST_GELU:快速运算的Gelu激活函数,对Tensor内每个element做Gelu激活函数近似计算,计算速度更快,同时保持较高的准确性。
|
|
75
|
+
//! ACTIVATION_SWIGLU_FORWARD: Swiglu正向激活函数。Atlas 推理系列产品中只支持32位对齐的数据。
|
|
76
|
+
//! ACTIVATION_FASTER_GELU_FORWARD: 简化后的FastGelu激活函数,计算速度更快。
|
|
77
|
+
//! ACTIVATION_SWIGLU_BACKWARD: Swiglu正向激活函数的反向,求梯度时使用。只支持Atlas 800I A2推理产品。
|
|
69
78
|
//!
|
|
70
79
|
enum ActivationType : int {
|
|
71
|
-
ACTIVATION_UNDEFINED = 0,
|
|
72
|
-
ACTIVATION_RELU,
|
|
73
|
-
ACTIVATION_GELU,
|
|
74
|
-
ACTIVATION_FAST_GELU,
|
|
75
|
-
ACTIVATION_SWISH,
|
|
76
|
-
ACTIVATION_LOG,
|
|
77
|
-
ACTIVATION_SWIGLU_FORWARD,
|
|
78
|
-
ACTIVATION_SWIGLU_BACKWARD,
|
|
79
|
-
ACTIVATION_SIGMOID,
|
|
80
|
-
|
|
80
|
+
ACTIVATION_UNDEFINED = 0, //!< 未定义
|
|
81
|
+
ACTIVATION_RELU, //!< RELU激活类型
|
|
82
|
+
ACTIVATION_GELU, //!< GELU激活类型
|
|
83
|
+
ACTIVATION_FAST_GELU, //!< FAST_GELU激活类型
|
|
84
|
+
ACTIVATION_SWISH, //!< SWISH激活类型
|
|
85
|
+
ACTIVATION_LOG, //!< LOG激活类型
|
|
86
|
+
ACTIVATION_SWIGLU_FORWARD, //!< SWIGLU_FORWARD激活类型
|
|
87
|
+
ACTIVATION_SWIGLU_BACKWARD, //!< SWIGLU_BACKWARD激活类型
|
|
88
|
+
ACTIVATION_SIGMOID, //!< SIGMOID激活类型
|
|
89
|
+
ACTIVATION_FASTER_GELU_FORWARD, //!< FASTER_GELU_FORWARD激活类型
|
|
90
|
+
ACTIVATION_MAX, //!< 枚举最大值, 非激活类型
|
|
81
91
|
};
|
|
82
92
|
|
|
83
93
|
//!
|
|
@@ -98,8 +108,8 @@ struct ActivationParam {
|
|
|
98
108
|
//! \enum GeLUMode
|
|
99
109
|
//! \brief GeLU激活函数可选的计算模式
|
|
100
110
|
enum GeLUMode : int {
|
|
101
|
-
TANH_MODE = 0,
|
|
102
|
-
NONE_MODE,
|
|
111
|
+
TANH_MODE = 0, //!< 默认值,使用tanh估算
|
|
112
|
+
NONE_MODE, //!< 原GeLU计算公式
|
|
103
113
|
};
|
|
104
114
|
//! 激活函数类型,ActivationType类型枚举值.
|
|
105
115
|
ActivationType activationType = ACTIVATION_UNDEFINED;
|
|
@@ -109,6 +119,10 @@ struct ActivationParam {
|
|
|
109
119
|
int32_t dim = -1;
|
|
110
120
|
//! GeLU模式选择参数
|
|
111
121
|
GeLUMode geluMode = TANH_MODE;
|
|
122
|
+
//!
|
|
123
|
+
//! \brief 预留参数
|
|
124
|
+
//!
|
|
125
|
+
uint8_t rsv[8] = {0};
|
|
112
126
|
};
|
|
113
127
|
|
|
114
128
|
//!
|
|
@@ -135,6 +149,10 @@ struct AsStridedParam {
|
|
|
135
149
|
//! \warning offset的长度要求为1且元素要求大于或等于0.
|
|
136
150
|
//!
|
|
137
151
|
SVector<int64_t> offset;
|
|
152
|
+
//!
|
|
153
|
+
//! \brief 预留参数
|
|
154
|
+
//!
|
|
155
|
+
uint8_t rsv[8] = {0};
|
|
138
156
|
};
|
|
139
157
|
|
|
140
158
|
//!
|
|
@@ -156,9 +174,27 @@ struct CumsumParam {
|
|
|
156
174
|
//!
|
|
157
175
|
//! \brief 正向累加或逆向累加,默认为false.
|
|
158
176
|
//!
|
|
159
|
-
//! \note true
|
|
177
|
+
//! \note true:输出逆向累加(暂不支持) false:输出正向累加.
|
|
160
178
|
//!
|
|
161
179
|
bool reverse = false;
|
|
180
|
+
//!
|
|
181
|
+
//! \brief 预留参数
|
|
182
|
+
//!
|
|
183
|
+
uint8_t rsv[14] = {0};
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
//!
|
|
187
|
+
//! \brief 推理的长度大于训练长度时,embedding需要进行特殊处理。
|
|
188
|
+
//! 推理长度小于等于训练长度时,不进行插值;推理长度大于训练长度时,放大base动态插值。
|
|
189
|
+
//! 将输入的token序列的位置信息positionIds和inv_freq进行外积,再cos/sin运算得到最终的Rotary embedding的结果。
|
|
190
|
+
//!
|
|
191
|
+
struct DynamicNTKParam {
|
|
192
|
+
//! 选择输出数据类型的参数
|
|
193
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
194
|
+
//!
|
|
195
|
+
//! \brief 预留参数
|
|
196
|
+
//!
|
|
197
|
+
uint8_t rsv[12] = {0};
|
|
162
198
|
};
|
|
163
199
|
|
|
164
200
|
//!
|
|
@@ -177,6 +213,10 @@ struct GatherParam {
|
|
|
177
213
|
//! \warning 该参数必须大于或等于0,且小于或等于axis.
|
|
178
214
|
//!
|
|
179
215
|
int64_t batchDims = 0;
|
|
216
|
+
//!
|
|
217
|
+
//! \brief 预留参数
|
|
218
|
+
//!
|
|
219
|
+
uint8_t rsv[16] = {0};
|
|
180
220
|
};
|
|
181
221
|
|
|
182
222
|
//!
|
|
@@ -193,25 +233,41 @@ struct MultinomialParam {
|
|
|
193
233
|
uint32_t numSamples = 1;
|
|
194
234
|
//! \brief 随机数种子.
|
|
195
235
|
uint32_t randSeed = 0;
|
|
236
|
+
//!
|
|
237
|
+
//! \brief 预留参数
|
|
238
|
+
//!
|
|
239
|
+
uint8_t rsv[8] = {0};
|
|
196
240
|
};
|
|
197
241
|
|
|
198
242
|
//!
|
|
199
|
-
//! \brief
|
|
243
|
+
//! \brief 对输入张量指定维度切成多个张量。
|
|
200
244
|
//!
|
|
201
245
|
struct SplitParam {
|
|
202
246
|
//!
|
|
203
247
|
//! \brief 指定切分的维度索引
|
|
204
248
|
//!
|
|
205
|
-
//! splitDim须位于输入张量x的维度范围内,即如果x的维度为xDim
|
|
249
|
+
//! splitDim须位于输入张量x的维度范围内,即如果x的维度为xDim,则等长切分下splitDim的取值范围为[-xDim, xDim - 1]。
|
|
206
250
|
//! 当splitDim为负数时,其含义是从最高维度开始访问,如splitDim = -1,x维度数为dimNum,则拆分维度为dimNum - 1。
|
|
251
|
+
//! \warning 当使用不等长切分时,splitDim的取值范围为[0, xDim - 1]。
|
|
207
252
|
//!
|
|
208
253
|
int32_t splitDim = 0;
|
|
209
254
|
//!
|
|
210
|
-
//! \brief
|
|
255
|
+
//! \brief 切分次数,当前支持2或3.
|
|
211
256
|
//!
|
|
212
|
-
//! \warning
|
|
257
|
+
//! \warning 等长切分下输入张量x的维度须能够被splitNum整除,且当splitNum = 3时输入x要求是float16或者bf16数据类型。
|
|
213
258
|
//!
|
|
214
259
|
int32_t splitNum = 2;
|
|
260
|
+
//!
|
|
261
|
+
//! \brief 指定每个输出tensor在切分维度上的大小
|
|
262
|
+
//!
|
|
263
|
+
//! 不传入此参数时使用等长切分,传入此参数时使用splitV不等长切分
|
|
264
|
+
//! \warning splitSizes中的每一个元素要求大于等于1。splitSizes中的元素之和等于切分维度的大小。
|
|
265
|
+
//!
|
|
266
|
+
SVector<int32_t> splitSizes = {};
|
|
267
|
+
//!
|
|
268
|
+
//! \brief 预留参数
|
|
269
|
+
//!
|
|
270
|
+
uint8_t rsv[8] = {0};
|
|
215
271
|
};
|
|
216
272
|
|
|
217
273
|
//!
|
|
@@ -223,15 +279,18 @@ struct ConcatParam {
|
|
|
223
279
|
//!
|
|
224
280
|
//! 当concatDim为负数时,其含义是从最高维度开始访问,如concatDim = -1,输入张量维度数为dimNum,则拼接维度为dimNum - 1。
|
|
225
281
|
//!
|
|
226
|
-
//! \warning 输入x和y的维数要求一致。输入x或y的维度大小,除了concatDim
|
|
282
|
+
//! \warning 输入x和y的维数要求一致。输入x或y的维度大小,除了concatDim维外,其他维度要求相同。Atlas 推理系列产品中不支持bf16类型数据。
|
|
227
283
|
//!
|
|
228
284
|
int concatDim = 0;
|
|
285
|
+
//!
|
|
286
|
+
//! \brief 预留参数
|
|
287
|
+
//!
|
|
288
|
+
uint8_t rsv[12] = {0};
|
|
229
289
|
};
|
|
230
290
|
|
|
231
291
|
//!
|
|
232
292
|
//! \brief 从输入张量某个起始位置中提取指定大小的切片
|
|
233
293
|
//!
|
|
234
|
-
//!
|
|
235
294
|
struct SliceParam {
|
|
236
295
|
//!
|
|
237
296
|
//! \brief 每个维度切片的起始位置
|
|
@@ -249,10 +308,14 @@ struct SliceParam {
|
|
|
249
308
|
//! \warning size中元素要求大于等于-1。对应维度offsets,以及offsets + size须在x的对应维度的大小范围内。
|
|
250
309
|
//!
|
|
251
310
|
SVector<int64_t> size;
|
|
311
|
+
//!
|
|
312
|
+
//! \brief 预留参数
|
|
313
|
+
//!
|
|
314
|
+
uint8_t rsv[8] = {0};
|
|
252
315
|
};
|
|
253
316
|
|
|
254
317
|
//!
|
|
255
|
-
//! \brief Softmax多分类激活函数,将多维(最大8维)Tensor数据在指定轴上映射到0到1
|
|
318
|
+
//! \brief Softmax多分类激活函数,将多维(最大8维)Tensor数据在指定轴上映射到0到1之间,且softmax轴数值之和为1。
|
|
256
319
|
//!
|
|
257
320
|
struct SoftmaxParam {
|
|
258
321
|
//!
|
|
@@ -262,6 +325,10 @@ struct SoftmaxParam {
|
|
|
262
325
|
//! \warning axes的元素要求大于或等于-1且小于输入x的维度
|
|
263
326
|
//!
|
|
264
327
|
SVector<int64_t> axes;
|
|
328
|
+
//!
|
|
329
|
+
//! \brief 预留参数
|
|
330
|
+
//!
|
|
331
|
+
uint8_t rsv[8] = {0};
|
|
265
332
|
};
|
|
266
333
|
|
|
267
334
|
//!
|
|
@@ -270,6 +337,10 @@ struct SoftmaxParam {
|
|
|
270
337
|
struct TransposeParam {
|
|
271
338
|
//! 指示输入维度的重排结果, 需要保证输入正确,维度和输入x一致
|
|
272
339
|
SVector<int32_t> perm;
|
|
340
|
+
//!
|
|
341
|
+
//! \brief 预留参数
|
|
342
|
+
//!
|
|
343
|
+
uint8_t rsv[8] = {0};
|
|
273
344
|
};
|
|
274
345
|
|
|
275
346
|
//!
|
|
@@ -287,27 +358,28 @@ struct ElewiseParam {
|
|
|
287
358
|
//! \brief 计算类型
|
|
288
359
|
//!
|
|
289
360
|
enum ElewiseType : int {
|
|
290
|
-
ELEWISE_UNDEFINED = 0,
|
|
291
|
-
ELEWISE_CAST,
|
|
292
|
-
ELEWISE_MULS,
|
|
293
|
-
ELEWISE_COS,
|
|
294
|
-
ELEWISE_SIN,
|
|
295
|
-
ELEWISE_NEG,
|
|
296
|
-
ELEWISE_QUANT,
|
|
297
|
-
ELEWISE_LOGICAL_NOT,
|
|
298
|
-
ELEWISE_ADD,
|
|
299
|
-
ELEWISE_MUL,
|
|
300
|
-
ELEWISE_REALDIV,
|
|
301
|
-
ELEWISE_LOGICAL_AND,
|
|
302
|
-
ELEWISE_LOGICAL_OR,
|
|
303
|
-
ELEWISE_LESS,
|
|
304
|
-
ELEWISE_GREATER,
|
|
305
|
-
ELEWISE_SUB,
|
|
306
|
-
ELEWISE_EQUAL,
|
|
307
|
-
ELEWISE_QUANT_PER_CHANNEL,
|
|
308
|
-
ELEWISE_DEQUANT_PER_CHANNEL,
|
|
309
|
-
ELEWISE_DYNAMIC_QUANT,
|
|
310
|
-
ELEWISE_TANH,
|
|
361
|
+
ELEWISE_UNDEFINED = 0, //!< 默认值,未定义
|
|
362
|
+
ELEWISE_CAST, //!< 数据类型转换
|
|
363
|
+
ELEWISE_MULS, //!< 向量逐元素乘值
|
|
364
|
+
ELEWISE_COS, //!< 逐元素计算余弦值
|
|
365
|
+
ELEWISE_SIN, //!< 逐元素计算正弦值
|
|
366
|
+
ELEWISE_NEG, //!< 逐元素取相反数
|
|
367
|
+
ELEWISE_QUANT, //!< 量化, 仅在Atlas 800I A2推理产品上支持
|
|
368
|
+
ELEWISE_LOGICAL_NOT, //!< 逐元素逻辑非
|
|
369
|
+
ELEWISE_ADD, //!< 逐元素相加
|
|
370
|
+
ELEWISE_MUL, //!< 向量与向量逐元素相乘
|
|
371
|
+
ELEWISE_REALDIV, //!< 向量与向量逐元素相除
|
|
372
|
+
ELEWISE_LOGICAL_AND, //!< 逐元素逻辑与
|
|
373
|
+
ELEWISE_LOGICAL_OR, //!< 逐元素逻辑或
|
|
374
|
+
ELEWISE_LESS, //!< 逐元素判断是否小于
|
|
375
|
+
ELEWISE_GREATER, //!< 逐元素判断是否大于
|
|
376
|
+
ELEWISE_SUB, //!< 逐元素相减
|
|
377
|
+
ELEWISE_EQUAL, //!< 逐元素判断是否相等
|
|
378
|
+
ELEWISE_QUANT_PER_CHANNEL, //!< 每个通道量化
|
|
379
|
+
ELEWISE_DEQUANT_PER_CHANNEL, //!< 每个通道反量化
|
|
380
|
+
ELEWISE_DYNAMIC_QUANT, //!< 逐行动态量化
|
|
381
|
+
ELEWISE_TANH, //!< 逐元素计算双曲正切值
|
|
382
|
+
ELEWISE_TYPE_MAX //!< 边界值,仅用于判断是否出界,所有情况不能取该值
|
|
311
383
|
};
|
|
312
384
|
|
|
313
385
|
//! 量化(非每通道)所需参数
|
|
@@ -318,12 +390,20 @@ struct ElewiseParam {
|
|
|
318
390
|
bool asymmetric = false; //!< false : symmetric,true : asymmetric
|
|
319
391
|
//! 量化的偏移度
|
|
320
392
|
int inputOffset = 0;
|
|
393
|
+
//!
|
|
394
|
+
//! \brief 预留参数
|
|
395
|
+
//!
|
|
396
|
+
uint8_t rsv[20] = {0};
|
|
321
397
|
};
|
|
322
398
|
|
|
323
399
|
//! 向量乘值所需参数
|
|
324
400
|
struct MulsParam {
|
|
325
401
|
//! 向量乘的值
|
|
326
402
|
float varAttr = 0.0f;
|
|
403
|
+
//!
|
|
404
|
+
//! \brief 预留参数
|
|
405
|
+
//!
|
|
406
|
+
uint8_t rsv[12] = {0};
|
|
327
407
|
};
|
|
328
408
|
|
|
329
409
|
//! 计算方式
|
|
@@ -334,6 +414,10 @@ struct ElewiseParam {
|
|
|
334
414
|
MulsParam mulsParam;
|
|
335
415
|
//! 指定数据类型转换输出的数据类型
|
|
336
416
|
aclDataType outTensorType = ACL_DT_UNDEFINED;
|
|
417
|
+
//!
|
|
418
|
+
//! \brief 预留参数
|
|
419
|
+
//!
|
|
420
|
+
uint8_t rsv[8] = {0};
|
|
337
421
|
};
|
|
338
422
|
|
|
339
423
|
//!
|
|
@@ -341,18 +425,61 @@ struct ElewiseParam {
|
|
|
341
425
|
//!
|
|
342
426
|
//! \brief KVCache处理。
|
|
343
427
|
//!
|
|
344
|
-
struct KvCacheParam {
|
|
428
|
+
struct KvCacheParam {
|
|
429
|
+
//!
|
|
430
|
+
//! \brief 预留参数
|
|
431
|
+
//!
|
|
432
|
+
uint8_t rsv[8] = {0};
|
|
433
|
+
};
|
|
345
434
|
|
|
346
435
|
//!
|
|
347
436
|
//! \struct GatingParam
|
|
348
437
|
//!
|
|
349
438
|
//! \brief 主要功能为将token和专家的映射关系反转为专家与token的映射关系。算子输入为MoE模型每个token选中专家的索引,算子输出为MoE模型每个专家对应的token的索引。
|
|
350
439
|
//!
|
|
440
|
+
//! \note 该算子支持TP和EP场景,当参数deviceExpert为空时,为TP场景,否则为EP场景。
|
|
441
|
+
//!
|
|
442
|
+
//! \warning 非Atlas 800I A2推理产品仅支持TP场景。
|
|
443
|
+
//!
|
|
351
444
|
struct GatingParam {
|
|
352
|
-
//!
|
|
353
|
-
|
|
354
|
-
//!
|
|
445
|
+
//!
|
|
446
|
+
//! \brief 每个token选中的专家数。
|
|
447
|
+
//!
|
|
448
|
+
//! \note 默认值为1。
|
|
449
|
+
//!
|
|
450
|
+
//! \warning 当cumSumNum为0时,取值为1;否则,取值范围为(0, cumSumNum]。
|
|
451
|
+
//!
|
|
452
|
+
int32_t topkExpertNum = 1;
|
|
453
|
+
//!
|
|
454
|
+
//! \brief 专家总数。
|
|
455
|
+
//!
|
|
456
|
+
//! \note 默认值为0。
|
|
457
|
+
//!
|
|
458
|
+
//! \warning 取值范围为[0, 200]。
|
|
459
|
+
//!
|
|
355
460
|
int32_t cumSumNum = 0;
|
|
461
|
+
//!
|
|
462
|
+
//! \brief 输出的cumSum的类型是否为int64。
|
|
463
|
+
//!
|
|
464
|
+
//! \note 默认值为false。
|
|
465
|
+
//!
|
|
466
|
+
//! \warning 当为false时,输出的cumSum类型为int32.
|
|
467
|
+
//!
|
|
468
|
+
bool cumSumInt64 = false;
|
|
469
|
+
//!
|
|
470
|
+
//! \brief 当前device上的专家索引列表。
|
|
471
|
+
//!
|
|
472
|
+
//! \note 默认为空。
|
|
473
|
+
//!
|
|
474
|
+
//! \warning 列表中各个元素取值范围为[0, cumSumNum),且其中元素值不可重复。
|
|
475
|
+
//!
|
|
476
|
+
//! \warning 当cumSumNum为0时,不可为空。
|
|
477
|
+
//!
|
|
478
|
+
std::vector<int32_t> deviceExpert;
|
|
479
|
+
//!
|
|
480
|
+
//! \brief 预留参数
|
|
481
|
+
//!
|
|
482
|
+
uint8_t rsv[16] = {0};
|
|
356
483
|
};
|
|
357
484
|
|
|
358
485
|
//!
|
|
@@ -364,15 +491,179 @@ struct ReshapeAndCacheParam {
|
|
|
364
491
|
//!
|
|
365
492
|
//! \brief 压缩类型
|
|
366
493
|
//!
|
|
494
|
+
//! \note 默认值为COMPRESS_TYPE_UNDEFINED(0),不开启压缩功能。
|
|
495
|
+
//!
|
|
496
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持设置为非COMPRESS_TYPE_UNDEFINED(0)的值
|
|
497
|
+
//!
|
|
498
|
+
enum CompressType : int {
|
|
499
|
+
COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
|
|
500
|
+
COMPRESS_TYPE_KVHEAD, //!< alibi场景下压缩key_cache, value_cahe的kvHead维度
|
|
501
|
+
COMPRESS_TYPE_KVHEAD_ROPE //!< rope场景下压缩key_cache, value_cahe的kvHead维度
|
|
502
|
+
};
|
|
503
|
+
//!
|
|
504
|
+
//! \enum KvCacheCfg
|
|
505
|
+
//!
|
|
506
|
+
//! \brief KvCache配置
|
|
507
|
+
//!
|
|
508
|
+
//! \note 默认值为K_CACHE_V_CACHE(0),传入key_cache和value_cache
|
|
509
|
+
//!
|
|
510
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持设置为K_CACHE_V_BYPASS(1)
|
|
511
|
+
//!
|
|
512
|
+
enum KvCacheCfg : int {
|
|
513
|
+
K_CACHE_V_CACHE = 0, //!< 默认值,传入key_cache和value_cache
|
|
514
|
+
K_CACHE_V_BYPASS, //!< 只传入key_cache
|
|
515
|
+
K_CACHE_V_CACHE_NZ //!< 传入key_cache和value_cache,且为NZ格式
|
|
516
|
+
};
|
|
517
|
+
|
|
518
|
+
//! 压缩方式
|
|
519
|
+
CompressType compressType = COMPRESS_TYPE_UNDEFINED;
|
|
520
|
+
//! kvcache配置
|
|
521
|
+
KvCacheCfg kvCacheCfg = K_CACHE_V_CACHE;
|
|
522
|
+
//!
|
|
523
|
+
//! \brief 预留参数
|
|
524
|
+
//!
|
|
525
|
+
uint8_t rsv[16] = {0};
|
|
526
|
+
};
|
|
527
|
+
|
|
528
|
+
//!
|
|
529
|
+
//! \brief 遍历每个key和value,将key和value(num_heads, head_size)按照slotmapping填入key_cache/value_cache指定位置
|
|
530
|
+
//!
|
|
531
|
+
struct ReshapeAndCacheWithStrideParam {
|
|
532
|
+
//!
|
|
533
|
+
//! \enum CompressType
|
|
534
|
+
//!
|
|
535
|
+
//! \brief 压缩类型
|
|
536
|
+
//!
|
|
537
|
+
//! \note 默认值为COMPRESS_TYPE_UNDEFINED(0),不开启压缩功能。
|
|
538
|
+
//!
|
|
539
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持设置为非COMPRESS_TYPE_UNDEFINED(0)的值
|
|
540
|
+
//!
|
|
367
541
|
enum CompressType : int {
|
|
368
|
-
COMPRESS_TYPE_UNDEFINED = 0,
|
|
369
|
-
COMPRESS_TYPE_KVHEAD
|
|
542
|
+
COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
|
|
543
|
+
COMPRESS_TYPE_KVHEAD, //!< alibi场景下压缩key_cache, value_cahe的kvHead维度
|
|
544
|
+
COMPRESS_TYPE_KVHEAD_ROPE //!< rope场景下压缩key_cache, value_cahe的kvHead维度
|
|
545
|
+
};
|
|
546
|
+
//!
|
|
547
|
+
//! \enum KvCacheCfg
|
|
548
|
+
//!
|
|
549
|
+
//! \brief KvCache配置
|
|
550
|
+
//!
|
|
551
|
+
//! \note 默认值为K_CACHE_V_CACHE(0),传入key_cache和value_cache
|
|
552
|
+
//!
|
|
553
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持设置为K_CACHE_V_BYPASS(1)
|
|
554
|
+
//!
|
|
555
|
+
enum KvCacheCfg : int {
|
|
556
|
+
K_CACHE_V_CACHE = 0, //!< 默认值,传入key_cache和value_cache
|
|
557
|
+
K_CACHE_V_BYPASS, //!< 只传入key_cache
|
|
370
558
|
};
|
|
371
559
|
|
|
372
560
|
//! 压缩方式
|
|
373
561
|
CompressType compressType = COMPRESS_TYPE_UNDEFINED;
|
|
562
|
+
//! kvcache配置
|
|
563
|
+
KvCacheCfg kvCacheCfg = K_CACHE_V_CACHE;
|
|
564
|
+
//!
|
|
565
|
+
//! \brief 预留参数
|
|
566
|
+
//!
|
|
567
|
+
uint8_t rsv[16] = {0};
|
|
568
|
+
};
|
|
569
|
+
|
|
570
|
+
//!
|
|
571
|
+
//! \struct LayerNormWithStrideParam
|
|
572
|
+
//!
|
|
573
|
+
//! \brief LayerNormWithStrideParam归一化处理。当前支持:NORM。
|
|
574
|
+
//!
|
|
575
|
+
//! \warning beginNormAxis维度小于等于输入x的维度。
|
|
576
|
+
//! 所有输入输出Tensor的最后一维大小相等。
|
|
577
|
+
//! Atlas 推理系列产品中不支持bf16类型数据。
|
|
578
|
+
//!
|
|
579
|
+
struct LayerNormWithStrideParam {
|
|
580
|
+
//!
|
|
581
|
+
//! \enum LayerNormType
|
|
582
|
+
//!
|
|
583
|
+
//! \brief 归一化类型:NORM、PRENORM、POSTNORM。
|
|
584
|
+
//!
|
|
585
|
+
enum LayerNormType : int {
|
|
586
|
+
LAYER_NORM_UNDEFINED = 0, //!< 默认值,未定义
|
|
587
|
+
LAYER_NORM_NORM, //!< norm
|
|
588
|
+
LAYER_NORM_PRENORM, //!< prenorm
|
|
589
|
+
LAYER_NORM_POSTNORM, //!< postnorm
|
|
590
|
+
LAYER_NORM_MAX,
|
|
591
|
+
};
|
|
592
|
+
//!
|
|
593
|
+
//! \brief NORM参数。
|
|
594
|
+
//!
|
|
595
|
+
struct NormParam {
|
|
596
|
+
//! \brief 量化类型。
|
|
597
|
+
//! 当前支持以下类型。
|
|
598
|
+
//! QUANT_UNQUANT;
|
|
599
|
+
//! QUANT_INT8
|
|
600
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
601
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
602
|
+
float epsilon = 1e-5;
|
|
603
|
+
//! \brief 归一化的维度,默认值为0,从第几维开始norm,同时决定输入gamma和beta维度。
|
|
604
|
+
int32_t beginNormAxis = 0;
|
|
605
|
+
//! \brief 归一化的维度,默认值为0,决定从第几维开始把后面的维度按轴合并。
|
|
606
|
+
int32_t beginParamsAxis = 0;
|
|
607
|
+
//! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
|
|
608
|
+
DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
|
|
609
|
+
//!
|
|
610
|
+
//! \brief 预留参数
|
|
611
|
+
//!
|
|
612
|
+
uint8_t rsv[20] = {0};
|
|
613
|
+
};
|
|
614
|
+
//!
|
|
615
|
+
//! \brief PRENORM参数
|
|
616
|
+
//!
|
|
617
|
+
struct PreNormParam {
|
|
618
|
+
//! \brief 量化类型。
|
|
619
|
+
//! 当前仅支持QUANT_UNQUANT。
|
|
620
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
621
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
622
|
+
float epsilon = 1e-5;
|
|
623
|
+
//! \brief 0:高精度 1:高性能(暂不支持)。
|
|
624
|
+
uint64_t opMode = 0;
|
|
625
|
+
//! \brief 缩放因子。
|
|
626
|
+
float zoomScaleValue = 1.0f;
|
|
627
|
+
//!
|
|
628
|
+
//! \brief 预留参数
|
|
629
|
+
//!
|
|
630
|
+
uint8_t rsv[20] = {0};
|
|
631
|
+
};
|
|
632
|
+
//!
|
|
633
|
+
//! \brief POSTNORM参数。
|
|
634
|
+
//!
|
|
635
|
+
struct PostNormParam {
|
|
636
|
+
//! \brief 量化类型。
|
|
637
|
+
//! 当前支持以下类型。
|
|
638
|
+
//! QUANT_UNQUANT;
|
|
639
|
+
//! QUANT_INT8
|
|
640
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
641
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
642
|
+
float epsilon = 1e-5;
|
|
643
|
+
//! \brief 0:高精度 1:高性能(暂不支持)。
|
|
644
|
+
uint64_t opMode = 0;
|
|
645
|
+
//! \brief 缩放因子。
|
|
646
|
+
float zoomScaleValue = 1.0f;
|
|
647
|
+
//!
|
|
648
|
+
//! \brief 预留参数
|
|
649
|
+
//!
|
|
650
|
+
uint8_t rsv[20] = {0};
|
|
651
|
+
};
|
|
652
|
+
//! \brief layerType
|
|
653
|
+
LayerNormType layerType = LAYER_NORM_UNDEFINED;
|
|
654
|
+
//! \brief normParam
|
|
655
|
+
NormParam normParam;
|
|
656
|
+
//! \brief preNormParam
|
|
657
|
+
PreNormParam preNormParam;
|
|
658
|
+
//! \brief postNormParam
|
|
659
|
+
PostNormParam postNormParam;
|
|
660
|
+
//!
|
|
661
|
+
//! \brief 预留参数
|
|
662
|
+
//!
|
|
663
|
+
uint8_t rsv[8] = {0};
|
|
374
664
|
};
|
|
375
665
|
|
|
666
|
+
|
|
376
667
|
//!
|
|
377
668
|
//! \struct LayerNormParam
|
|
378
669
|
//!
|
|
@@ -380,6 +671,7 @@ struct ReshapeAndCacheParam {
|
|
|
380
671
|
//!
|
|
381
672
|
//! \warning beginNormAxis维度小于等于输入x的维度。
|
|
382
673
|
//! 所有输入输出Tensor的最后一维大小相等。
|
|
674
|
+
//! Atlas 推理系列产品中不支持bf16类型数据。
|
|
383
675
|
//!
|
|
384
676
|
struct LayerNormParam {
|
|
385
677
|
//!
|
|
@@ -388,10 +680,10 @@ struct LayerNormParam {
|
|
|
388
680
|
//! \brief 归一化类型:NORM、PRENORM、POSTNORM。
|
|
389
681
|
//!
|
|
390
682
|
enum LayerNormType : int {
|
|
391
|
-
LAYER_NORM_UNDEFINED = 0,
|
|
392
|
-
LAYER_NORM_NORM,
|
|
393
|
-
LAYER_NORM_PRENORM,
|
|
394
|
-
LAYER_NORM_POSTNORM,
|
|
683
|
+
LAYER_NORM_UNDEFINED = 0, //!< 默认值,未定义
|
|
684
|
+
LAYER_NORM_NORM, //!< norm
|
|
685
|
+
LAYER_NORM_PRENORM, //!< prenorm
|
|
686
|
+
LAYER_NORM_POSTNORM, //!< postnorm
|
|
395
687
|
LAYER_NORM_MAX,
|
|
396
688
|
};
|
|
397
689
|
//!
|
|
@@ -400,9 +692,9 @@ struct LayerNormParam {
|
|
|
400
692
|
struct NormParam {
|
|
401
693
|
//! \brief 量化类型。
|
|
402
694
|
//! 当前支持以下类型。
|
|
403
|
-
//!
|
|
695
|
+
//! QUANT_UNQUANT;
|
|
404
696
|
//! QUANT_INT8
|
|
405
|
-
QuantType quantType =
|
|
697
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
406
698
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
407
699
|
float epsilon = 1e-5;
|
|
408
700
|
//! \brief 归一化的维度,默认值为0,从第几维开始norm,同时决定输入gamma和beta维度。
|
|
@@ -411,20 +703,28 @@ struct LayerNormParam {
|
|
|
411
703
|
int32_t beginParamsAxis = 0;
|
|
412
704
|
//! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
|
|
413
705
|
DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
|
|
706
|
+
//!
|
|
707
|
+
//! \brief 预留参数
|
|
708
|
+
//!
|
|
709
|
+
uint8_t rsv[20] = {0};
|
|
414
710
|
};
|
|
415
711
|
//!
|
|
416
712
|
//! \brief PRENORM参数
|
|
417
713
|
//!
|
|
418
714
|
struct PreNormParam {
|
|
419
715
|
//! \brief 量化类型。
|
|
420
|
-
//! 当前仅支持
|
|
421
|
-
QuantType quantType =
|
|
716
|
+
//! 当前仅支持QUANT_UNQUANT。
|
|
717
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
422
718
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
423
719
|
float epsilon = 1e-5;
|
|
424
720
|
//! \brief 0:高精度 1:高性能(暂不支持)。
|
|
425
|
-
|
|
721
|
+
uint64_t opMode = 0;
|
|
426
722
|
//! \brief 缩放因子。
|
|
427
723
|
float zoomScaleValue = 1.0f;
|
|
724
|
+
//!
|
|
725
|
+
//! \brief 预留参数
|
|
726
|
+
//!
|
|
727
|
+
uint8_t rsv[20] = {0};
|
|
428
728
|
};
|
|
429
729
|
//!
|
|
430
730
|
//! \brief POSTNORM参数。
|
|
@@ -432,15 +732,19 @@ struct LayerNormParam {
|
|
|
432
732
|
struct PostNormParam {
|
|
433
733
|
//! \brief 量化类型。
|
|
434
734
|
//! 当前支持以下类型。
|
|
435
|
-
//!
|
|
735
|
+
//! QUANT_UNQUANT;
|
|
436
736
|
//! QUANT_INT8
|
|
437
|
-
QuantType quantType =
|
|
737
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
438
738
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
439
739
|
float epsilon = 1e-5;
|
|
440
740
|
//! \brief 0:高精度 1:高性能(暂不支持)。
|
|
441
|
-
|
|
741
|
+
uint64_t opMode = 0;
|
|
442
742
|
//! \brief 缩放因子。
|
|
443
743
|
float zoomScaleValue = 1.0f;
|
|
744
|
+
//!
|
|
745
|
+
//! \brief 预留参数
|
|
746
|
+
//!
|
|
747
|
+
uint8_t rsv[20] = {0};
|
|
444
748
|
};
|
|
445
749
|
//! \brief layerType
|
|
446
750
|
LayerNormType layerType = LAYER_NORM_UNDEFINED;
|
|
@@ -450,6 +754,10 @@ struct LayerNormParam {
|
|
|
450
754
|
PreNormParam preNormParam;
|
|
451
755
|
//! \brief postNormParam
|
|
452
756
|
PostNormParam postNormParam;
|
|
757
|
+
//!
|
|
758
|
+
//! \brief 预留参数
|
|
759
|
+
//!
|
|
760
|
+
uint8_t rsv[8] = {0};
|
|
453
761
|
};
|
|
454
762
|
|
|
455
763
|
//!
|
|
@@ -458,30 +766,31 @@ struct LayerNormParam {
|
|
|
458
766
|
//! \brief RMS归一化处理。
|
|
459
767
|
//!
|
|
460
768
|
//! \warning 所有输入输出Tensor的最后一维大小相等。
|
|
769
|
+
//! Atlas 推理系列产品中不支持bf16类型数据。
|
|
461
770
|
//!
|
|
462
771
|
struct RmsNormParam {
|
|
463
772
|
//!
|
|
464
773
|
//! \brief RmsNormType
|
|
465
774
|
//!
|
|
466
775
|
enum RmsNormType : int {
|
|
467
|
-
RMS_NORM_UNDEFINED = 0,
|
|
468
|
-
RMS_NORM_NORM,
|
|
469
|
-
RMS_NORM_PRENORM,
|
|
470
|
-
RMS_NORM_POSTNORM,
|
|
776
|
+
RMS_NORM_UNDEFINED = 0, //!< 默认值,未定义
|
|
777
|
+
RMS_NORM_NORM, //!< NORM参数。
|
|
778
|
+
RMS_NORM_PRENORM, //!< PRENORM参数。
|
|
779
|
+
RMS_NORM_POSTNORM, //!< POSTNORM参数
|
|
471
780
|
};
|
|
472
781
|
//!
|
|
473
782
|
//! \brief PrecisionMode
|
|
474
783
|
//!
|
|
475
784
|
enum PrecisionMode : int {
|
|
476
|
-
HIGH_PRECISION_MODE = 0,
|
|
477
|
-
HIGH_PERFORMANCE_MODE,
|
|
785
|
+
HIGH_PRECISION_MODE = 0, //!< 中间计算使用float类型
|
|
786
|
+
HIGH_PERFORMANCE_MODE, //!< 中间计算使用float16类型
|
|
478
787
|
};
|
|
479
788
|
//!
|
|
480
789
|
//! \brief ModelType
|
|
481
790
|
//!
|
|
482
791
|
enum ModelType : int {
|
|
483
|
-
LLAMA_MODEL = 0,
|
|
484
|
-
GEMMA_MODEL,
|
|
792
|
+
LLAMA_MODEL = 0, //!< 默认值,使用Llama rmsnorm的公式
|
|
793
|
+
GEMMA_MODEL, //!< 使用Gemma rmsnorm的公式
|
|
485
794
|
};
|
|
486
795
|
//!
|
|
487
796
|
//! \brief NormParam
|
|
@@ -489,8 +798,8 @@ struct RmsNormParam {
|
|
|
489
798
|
struct NormParam {
|
|
490
799
|
//! \brief 量化类型。
|
|
491
800
|
//! 当前支持以下类型。
|
|
492
|
-
//!
|
|
493
|
-
QuantType quantType =
|
|
801
|
+
//! QUANT_UNQUANT, QUANT_INT8
|
|
802
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
494
803
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
495
804
|
float epsilon = 1e-5;
|
|
496
805
|
//! \brief Epsilon,默认为1e-5,暂时不使用。
|
|
@@ -500,9 +809,9 @@ struct RmsNormParam {
|
|
|
500
809
|
bool rstd = false;
|
|
501
810
|
//! \brief 默认为HIGH_PRECISION_MODE。
|
|
502
811
|
//! 支持参数如下:
|
|
503
|
-
//! HIGH_PRECISION_MODE:默认值,中间计算使用
|
|
504
|
-
//! HIGH_PERFORMANCE_MODE: 中间计算使用
|
|
505
|
-
//! 不支持和“rstd”,“modelType
|
|
812
|
+
//! HIGH_PRECISION_MODE:默认值,中间计算使用float类型
|
|
813
|
+
//! HIGH_PERFORMANCE_MODE: 中间计算使用float16类型
|
|
814
|
+
//! 不支持和“rstd”,“modelType”同时设置。输入类型只支持float16。
|
|
506
815
|
//! 量化场景下不支持使用“precisionMode”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
|
|
507
816
|
PrecisionMode precisionMode = HIGH_PRECISION_MODE;
|
|
508
817
|
//! \brief 默认为LLAMA_MODEL,设置为GEMMA_MODEL时使用gemma模型的rmsnorm计算公式。
|
|
@@ -514,6 +823,10 @@ struct RmsNormParam {
|
|
|
514
823
|
ModelType modelType = LLAMA_MODEL;
|
|
515
824
|
//! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
|
|
516
825
|
DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
|
|
826
|
+
//!
|
|
827
|
+
//! \brief 预留参数
|
|
828
|
+
//!
|
|
829
|
+
uint8_t rsv[32] = {0};
|
|
517
830
|
};
|
|
518
831
|
//!
|
|
519
832
|
//! \brief PreNormParam
|
|
@@ -521,25 +834,33 @@ struct RmsNormParam {
|
|
|
521
834
|
struct PreNormParam {
|
|
522
835
|
//! \brief 量化类型。
|
|
523
836
|
//! 当前支持以下类型。
|
|
524
|
-
//!
|
|
837
|
+
//! QUANT_UNQUANT
|
|
525
838
|
//! QUANT_INT8
|
|
526
|
-
QuantType quantType =
|
|
839
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
527
840
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
528
841
|
float epsilon = 1e-5;
|
|
529
842
|
//! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。量化场景下不支持使用“hasBias”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
|
|
530
843
|
bool hasBias = false;
|
|
844
|
+
//!
|
|
845
|
+
//! \brief 预留参数
|
|
846
|
+
//!
|
|
847
|
+
uint8_t rsv[23] = {0};
|
|
531
848
|
};
|
|
532
849
|
//!
|
|
533
850
|
//! \brief PostNormParam
|
|
534
851
|
//!
|
|
535
852
|
struct PostNormParam {
|
|
536
853
|
//! \brief 量化类型。
|
|
537
|
-
//! 当前仅支持
|
|
538
|
-
QuantType quantType =
|
|
854
|
+
//! 当前仅支持QUANT_UNQUANT。
|
|
855
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
539
856
|
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
540
857
|
float epsilon = 1e-5;
|
|
541
858
|
//! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。
|
|
542
859
|
bool hasBias = false;
|
|
860
|
+
//!
|
|
861
|
+
//! \brief 预留参数
|
|
862
|
+
//!
|
|
863
|
+
uint8_t rsv[23] = {0};
|
|
543
864
|
};
|
|
544
865
|
//! \brief 归一化类型,参数如下:
|
|
545
866
|
//! RMS_NORM_UNDEFINED:默认值,未定义。
|
|
@@ -553,123 +874,206 @@ struct RmsNormParam {
|
|
|
553
874
|
PreNormParam preNormParam;
|
|
554
875
|
//! \brief POSTNORM参数。
|
|
555
876
|
PostNormParam postNormParam;
|
|
877
|
+
//!
|
|
878
|
+
//! \brief 预留参数
|
|
879
|
+
//!
|
|
880
|
+
uint8_t rsv[8] = {0};
|
|
556
881
|
};
|
|
557
882
|
|
|
558
883
|
//!
|
|
559
|
-
//! \struct
|
|
560
|
-
//!
|
|
561
|
-
//! \brief 将指定位置设置为value值或者生成一个指定Shape的Tensor并填充为value。
|
|
562
|
-
//!
|
|
563
|
-
//! \warning 输入x不可以被broadcast。输入mask的元素只能是0或者1,且可以被broadcast。
|
|
564
|
-
//!
|
|
565
|
-
struct FillParam {
|
|
566
|
-
//! \brief 是否Masked Fill。
|
|
567
|
-
bool withMask = true;
|
|
568
|
-
//! \brief 填充的元素,value是一个只含有一个元素的SVector。
|
|
569
|
-
SVector<float> value;
|
|
570
|
-
//! \brief withMask = false时,表示输出Tensor的Shape。
|
|
571
|
-
SVector<int64_t> outDim;
|
|
572
|
-
};
|
|
573
|
-
|
|
574
|
-
//!
|
|
575
|
-
//! \struct AllGatherParam
|
|
576
|
-
//!
|
|
577
|
-
//! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.
|
|
578
|
-
//!
|
|
579
|
-
//! rank、rankSize、rankRoot需满足以下条件:
|
|
580
|
-
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
884
|
+
//! \struct RmsNormWithStrideParam
|
|
581
885
|
//!
|
|
582
|
-
//! \
|
|
583
|
-
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
886
|
+
//! \brief RMS归一化处理。
|
|
584
887
|
//!
|
|
585
|
-
//! \
|
|
586
|
-
//!
|
|
587
|
-
//! rm -rf /dev/shm/sem.hccl*
|
|
588
|
-
//! ipcrm -a
|
|
589
|
-
//! \endcode
|
|
888
|
+
//! \warning 所有输入输出Tensor的最后一维大小相等。
|
|
889
|
+
//! Atlas 推理系列产品中不支持bf16类型数据。
|
|
590
890
|
//!
|
|
591
|
-
struct
|
|
592
|
-
//! \brief 每张卡所属通信编号
|
|
593
|
-
int rank = 0;
|
|
594
|
-
//! \brief 通信的卡的数量
|
|
595
|
-
int rankSize = 0;
|
|
596
|
-
//! \brief 主通信编号
|
|
597
|
-
int rankRoot = 0;
|
|
598
|
-
//! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品(配置Atlas 300I DUO)仅支持backend为"hccl"。
|
|
891
|
+
struct RmsNormWithStrideParam {
|
|
599
892
|
//!
|
|
600
|
-
//!
|
|
601
|
-
//! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
|
|
602
|
-
//!
|
|
603
|
-
std::string backend = "hccl";
|
|
604
|
-
//! \brief HCCL通信域指针
|
|
605
|
-
HcclComm hcclComm = nullptr;
|
|
606
|
-
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
607
|
-
CommMode commMode = COMM_MULTI_PROCESS;
|
|
893
|
+
//! \brief RmsNormType
|
|
608
894
|
//!
|
|
609
|
-
|
|
895
|
+
enum RmsNormType : int {
|
|
896
|
+
RMS_NORM_UNDEFINED = 0, //!< 默认值,未定义
|
|
897
|
+
RMS_NORM_NORM, //!< NORM参数。
|
|
898
|
+
RMS_NORM_PRENORM, //!< PRENORM参数。
|
|
899
|
+
RMS_NORM_POSTNORM, //!< POSTNORM参数
|
|
900
|
+
};
|
|
610
901
|
//!
|
|
611
|
-
//!
|
|
612
|
-
//! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
|
|
902
|
+
//! \brief PrecisionMode
|
|
613
903
|
//!
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
};
|
|
618
|
-
|
|
619
|
-
//!
|
|
620
|
-
//! \struct AllReduceParam
|
|
621
|
-
//!
|
|
622
|
-
//! \brief 将多个通信卡上的数据进行计算,支持相加、取最大、最小、相乘四种计算,然后发送到每张卡上.
|
|
623
|
-
//!
|
|
624
|
-
//! rank、rankSize、rankRoot需满足以下条件:
|
|
625
|
-
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
626
|
-
//!
|
|
627
|
-
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
628
|
-
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
629
|
-
//!
|
|
630
|
-
//! \code
|
|
631
|
-
//! rm -rf /dev/shm/sem.lccl*
|
|
632
|
-
//! rm -rf /dev/shm/sem.hccl*
|
|
633
|
-
//! ipcrm -a
|
|
634
|
-
//! \endcode
|
|
635
|
-
//!
|
|
636
|
-
struct AllReduceParam {
|
|
637
|
-
//! \brief 每张卡所属通信编号.
|
|
638
|
-
int rank = 0;
|
|
639
|
-
//! \brief 通信的卡的数量.
|
|
640
|
-
int rankSize = 0;
|
|
641
|
-
//! \brief 主通信编号.
|
|
642
|
-
int rankRoot = 0;
|
|
643
|
-
//! \brief 通信计算类型,支持"sum","prod","max"和"min".
|
|
644
|
-
std::string allReduceType = "sum";
|
|
904
|
+
enum PrecisionMode : int {
|
|
905
|
+
HIGH_PRECISION_MODE = 0, //!< 中间计算使用float类型
|
|
906
|
+
HIGH_PERFORMANCE_MODE, //!< 中间计算使用float16类型
|
|
907
|
+
};
|
|
645
908
|
//!
|
|
646
|
-
//! \brief
|
|
909
|
+
//! \brief ModelType
|
|
647
910
|
//!
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
911
|
+
enum ModelType : int {
|
|
912
|
+
LLAMA_MODEL = 0, //!< 默认值,使用Llama rmsnorm的公式
|
|
913
|
+
GEMMA_MODEL, //!< 使用Gemma rmsnorm的公式
|
|
914
|
+
};
|
|
915
|
+
//!
|
|
916
|
+
//! \brief NormParam
|
|
917
|
+
//!
|
|
918
|
+
struct NormParam {
|
|
919
|
+
//! \brief 量化类型。
|
|
920
|
+
//! 当前支持以下类型。
|
|
921
|
+
//! QUANT_UNQUANT, QUANT_INT8
|
|
922
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
923
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
924
|
+
float epsilon = 1e-5;
|
|
925
|
+
//! \brief Epsilon,默认为1e-5,暂时不使用。
|
|
926
|
+
double layerNormEps = 1e-5;
|
|
927
|
+
//! \brief 默认为False,设置为true时会使用训练的rmsnormforward算子。仅在Atlas 800I A2推理产品上支持该设置。
|
|
928
|
+
//! 不支持和“precisionMode”,“modelType”同时设置。量化场景下不支持使用“rstd”。
|
|
929
|
+
bool rstd = false;
|
|
930
|
+
//! \brief 默认为HIGH_PRECISION_MODE。
|
|
931
|
+
//! 支持参数如下:
|
|
932
|
+
//! HIGH_PRECISION_MODE:默认值,中间计算使用float类型
|
|
933
|
+
//! HIGH_PERFORMANCE_MODE: 中间计算使用float16类型
|
|
934
|
+
//! 不支持和“rstd”,“modelType”同时设置。输入类型只支持float16。
|
|
935
|
+
//! 量化场景下不支持使用“precisionMode”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
|
|
936
|
+
PrecisionMode precisionMode = HIGH_PRECISION_MODE;
|
|
937
|
+
//! \brief 默认为LLAMA_MODEL,设置为GEMMA_MODEL时使用gemma模型的rmsnorm计算公式。
|
|
938
|
+
//! 支持参数如下:
|
|
939
|
+
//! LLAMA_MODEL:默认值, Llama的rms norm计算公式。
|
|
940
|
+
//! GEMMA_MODEL:Gemma的rms norm计算公式。
|
|
941
|
+
//! 不支持和“rstd”,“precisionMode”同时启用。
|
|
942
|
+
//! 量化场景下不支持使用“modelType”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
|
|
943
|
+
ModelType modelType = LLAMA_MODEL;
|
|
944
|
+
//! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
|
|
945
|
+
DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
|
|
946
|
+
//!
|
|
947
|
+
//! \brief 预留参数
|
|
948
|
+
//!
|
|
949
|
+
uint8_t rsv[32] = {0};
|
|
950
|
+
};
|
|
951
|
+
//!
|
|
952
|
+
//! \brief PreNormParam
|
|
953
|
+
//!
|
|
954
|
+
struct PreNormParam {
|
|
955
|
+
//! \brief 量化类型。
|
|
956
|
+
//! 当前支持以下类型。
|
|
957
|
+
//! QUANT_UNQUANT
|
|
958
|
+
//! QUANT_INT8
|
|
959
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
960
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
961
|
+
float epsilon = 1e-5;
|
|
962
|
+
//! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。量化场景下不支持使用“hasBias”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
|
|
963
|
+
bool hasBias = false;
|
|
964
|
+
//!
|
|
965
|
+
//! \brief 预留参数
|
|
966
|
+
//!
|
|
967
|
+
uint8_t rsv[23] = {0};
|
|
968
|
+
};
|
|
969
|
+
//!
|
|
970
|
+
//! \brief PostNormParam
|
|
971
|
+
//!
|
|
972
|
+
struct PostNormParam {
|
|
973
|
+
//! \brief 量化类型。
|
|
974
|
+
//! 当前仅支持QUANT_UNQUANT。
|
|
975
|
+
QuantType quantType = QUANT_UNQUANT;
|
|
976
|
+
//! \brief Epsilon,归一化时加在分母上防止除零。
|
|
977
|
+
float epsilon = 1e-5;
|
|
978
|
+
//! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。
|
|
979
|
+
bool hasBias = false;
|
|
980
|
+
//!
|
|
981
|
+
//! \brief 预留参数
|
|
982
|
+
//!
|
|
983
|
+
uint8_t rsv[23] = {0};
|
|
984
|
+
};
|
|
985
|
+
//! \brief 归一化类型,参数如下:
|
|
986
|
+
//! RMS_NORM_UNDEFINED:默认值,未定义。
|
|
987
|
+
//! RMS_NORM_NORM:NORM参数。
|
|
988
|
+
//! RMS_NORM_PRENORM:PRENORM参数。
|
|
989
|
+
//! RMS_NORM_POSTNORM:POSTNORM参数。
|
|
990
|
+
RmsNormType layerType = RMS_NORM_UNDEFINED;
|
|
991
|
+
//! \brief NORM参数。
|
|
992
|
+
NormParam normParam;
|
|
993
|
+
//! \brief PRENORM参数。
|
|
994
|
+
PreNormParam preNormParam;
|
|
995
|
+
//! \brief POSTNORM参数。
|
|
996
|
+
PostNormParam postNormParam;
|
|
997
|
+
//!
|
|
998
|
+
//! \brief 预留参数
|
|
999
|
+
//!
|
|
1000
|
+
uint8_t rsv[8] = {0};
|
|
1001
|
+
};
|
|
1002
|
+
|
|
1003
|
+
//!
|
|
1004
|
+
//! \struct FillParam
|
|
1005
|
+
//!
|
|
1006
|
+
//! \brief 将指定位置设置为value值或者生成一个指定Shape的Tensor并填充为value。
|
|
1007
|
+
//!
|
|
1008
|
+
//! \warning 输入x不可以被broadcast。输入mask的元素只能是0或者1,且可以被broadcast。
|
|
1009
|
+
//!
|
|
1010
|
+
struct FillParam {
|
|
1011
|
+
//! \brief 是否Masked Fill。
|
|
1012
|
+
bool withMask = true;
|
|
1013
|
+
//! \brief 填充的元素,value是一个只含有一个元素的SVector。
|
|
1014
|
+
SVector<float> value;
|
|
1015
|
+
//! \brief withMask = false时,表示输出Tensor的Shape。
|
|
1016
|
+
SVector<int64_t> outDim;
|
|
1017
|
+
//!
|
|
1018
|
+
//! \brief 预留参数
|
|
1019
|
+
//!
|
|
1020
|
+
uint8_t rsv[8] = {0};
|
|
1021
|
+
};
|
|
1022
|
+
|
|
1023
|
+
//!
|
|
1024
|
+
//! \struct AllGatherParam
|
|
1025
|
+
//!
|
|
1026
|
+
//! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.
|
|
1027
|
+
//!
|
|
1028
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
1029
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
1030
|
+
//!
|
|
1031
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
1032
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
1033
|
+
//!
|
|
1034
|
+
//! \code
|
|
1035
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
1036
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
1037
|
+
//! ipcrm -a
|
|
1038
|
+
//! \endcode
|
|
1039
|
+
//!
|
|
1040
|
+
struct AllGatherParam {
|
|
1041
|
+
//! \brief 当前卡所属通信编号
|
|
1042
|
+
int rank = 0;
|
|
1043
|
+
//! \brief 通信的卡的数量
|
|
1044
|
+
int rankSize = 0;
|
|
1045
|
+
//! \brief 主通信编号
|
|
1046
|
+
int rankRoot = 0;
|
|
1047
|
+
//! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品仅支持backend为"hccl"。
|
|
1048
|
+
//!
|
|
1049
|
+
//! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
|
|
652
1050
|
//!
|
|
653
1051
|
std::string backend = "hccl";
|
|
654
|
-
//! \brief HCCL
|
|
1052
|
+
//! \brief HCCL通信域指针
|
|
1053
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
655
1054
|
HcclComm hcclComm = nullptr;
|
|
656
|
-
//! \brief 通信模式,CommMode
|
|
1055
|
+
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
657
1056
|
CommMode commMode = COMM_MULTI_PROCESS;
|
|
658
1057
|
//!
|
|
659
1058
|
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
660
1059
|
//!
|
|
661
|
-
//! ranktable配置参考
|
|
662
|
-
//! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
|
|
663
|
-
//!
|
|
664
1060
|
std::string rankTableFile;
|
|
665
|
-
//! \brief 通信device
|
|
1061
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
1062
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
1063
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
1064
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
1065
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
666
1066
|
std::string commDomain;
|
|
1067
|
+
//!
|
|
1068
|
+
//! \brief 预留参数
|
|
1069
|
+
//!
|
|
1070
|
+
uint8_t rsv[64] = {0};
|
|
667
1071
|
};
|
|
668
1072
|
|
|
669
1073
|
//!
|
|
670
|
-
//! \struct
|
|
1074
|
+
//! \struct AllGatherVParam
|
|
671
1075
|
//!
|
|
672
|
-
//! \brief
|
|
1076
|
+
//! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.支持每张卡的数据不等长
|
|
673
1077
|
//!
|
|
674
1078
|
//! rank、rankSize、rankRoot需满足以下条件:
|
|
675
1079
|
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
@@ -683,19 +1087,23 @@ struct AllReduceParam {
|
|
|
683
1087
|
//! ipcrm -a
|
|
684
1088
|
//! \endcode
|
|
685
1089
|
//!
|
|
686
|
-
struct
|
|
687
|
-
//! \brief
|
|
688
|
-
int rank =
|
|
689
|
-
//! \brief
|
|
1090
|
+
struct AllGatherVParam {
|
|
1091
|
+
//! \brief 当前卡所属通信编号, 默认值为-1, 代表没传rank参数
|
|
1092
|
+
int rank = -1;
|
|
1093
|
+
//! \brief 通信的卡的数量
|
|
690
1094
|
int rankSize = 0;
|
|
691
|
-
//! \brief
|
|
1095
|
+
//! \brief 主通信编号
|
|
692
1096
|
int rankRoot = 0;
|
|
693
|
-
//! \brief
|
|
1097
|
+
//! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品(Ascend 310P AI处理器)仅支持backend为"hccl"。
|
|
1098
|
+
//!
|
|
1099
|
+
//! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
|
|
1100
|
+
//!
|
|
1101
|
+
std::string backend = "hccl";
|
|
1102
|
+
//! \brief HCCL通信域指针
|
|
1103
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
694
1104
|
HcclComm hcclComm = nullptr;
|
|
695
|
-
//! \brief 通信模式,CommMode
|
|
1105
|
+
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
696
1106
|
CommMode commMode = COMM_MULTI_PROCESS;
|
|
697
|
-
//! \brief 通信后端指示,仅支持"hccl"和"lccl"。
|
|
698
|
-
std::string backend = "hccl";
|
|
699
1107
|
//!
|
|
700
1108
|
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
701
1109
|
//!
|
|
@@ -705,178 +1113,1338 @@ struct BroadcastParam {
|
|
|
705
1113
|
std::string rankTableFile;
|
|
706
1114
|
//! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
|
|
707
1115
|
std::string commDomain;
|
|
1116
|
+
//!
|
|
1117
|
+
//! \brief 预留参数
|
|
1118
|
+
//!
|
|
1119
|
+
uint8_t rsv[64] = {0};
|
|
708
1120
|
};
|
|
709
1121
|
|
|
710
1122
|
//!
|
|
711
|
-
//! \
|
|
712
|
-
//!
|
|
713
|
-
//! \brief 将A、B两个矩阵进行矩阵乘运算,同时可以选择对矩阵乘的运算结果添加偏置或进行反量化操作。
|
|
714
|
-
//!
|
|
715
|
-
//! 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行矩阵乘运算,可通过参数transposeA与transposeB控制做矩阵乘前是否需要对A矩阵和B矩阵进行行列转置,
|
|
716
|
-
//! 根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系,即A矩阵最后一维与B矩阵第0维相等。该算子分为浮点和量化两类,可通过输出数据类型进行选择。
|
|
1123
|
+
//! \brief 判断参数是否相同
|
|
717
1124
|
//!
|
|
718
|
-
//! \
|
|
1125
|
+
//! \param left
|
|
1126
|
+
//! \param right
|
|
1127
|
+
//! \return bool
|
|
719
1128
|
//!
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
//! \brief 是否转置B矩阵,默认转置。
|
|
727
|
-
//!
|
|
728
|
-
//! 在Atlas 推理系列产品(配置Atlas 300I DUO)中,量化情况下,transposeB必须为true。
|
|
729
|
-
bool transposeB = true;
|
|
730
|
-
//! \brief 是否叠加偏置。
|
|
731
|
-
//!
|
|
732
|
-
//! 在Atlas 推理系列产品(配置Atlas 300I DUO)中,量化情况下,hasBias必须为true。
|
|
733
|
-
bool hasBias = true;
|
|
734
|
-
//! \brief 输出数据类型.
|
|
735
|
-
//!
|
|
736
|
-
//! 若为浮点linear,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致;
|
|
737
|
-
//! 若为量化linear,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
|
|
738
|
-
//! 目前仅支持ACL_FLOAT16/ACL_BF16,在Atlas 推理系列产品(配置Atlas 300I DUO)中,不支持ACL_BF16。
|
|
739
|
-
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
740
|
-
};
|
|
1129
|
+
inline bool operator==(const AllGatherVParam &left, const AllGatherVParam &right)
|
|
1130
|
+
{
|
|
1131
|
+
return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
|
|
1132
|
+
left.hcclComm == right.hcclComm && left.commMode == right.commMode && left.backend == right.backend &&
|
|
1133
|
+
left.rankTableFile == right.rankTableFile && left.commDomain == right.commDomain;
|
|
1134
|
+
}
|
|
741
1135
|
|
|
742
1136
|
//!
|
|
743
|
-
//! \struct
|
|
1137
|
+
//! \struct AllReduceParam
|
|
744
1138
|
//!
|
|
745
|
-
//! \brief
|
|
1139
|
+
//! \brief 将多个通信卡上的数据进行计算,支持相加、取最大、最小、相乘四种计算,然后发送到每张卡上.
|
|
746
1140
|
//!
|
|
747
|
-
//!
|
|
1141
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
1142
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
748
1143
|
//!
|
|
749
|
-
//! \
|
|
1144
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
1145
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
750
1146
|
//!
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
LINEAR_ALL_REDUCE = 0, //!< linear+AllReduce
|
|
760
|
-
LINEAR_REDUCE_SCATTER = 1, //!< linear+reduce_scatter
|
|
761
|
-
ALL_GATHER_LINEAR = 2, //!< AllGather+linear
|
|
762
|
-
PURE_LINEAR = 3, //!< linear
|
|
763
|
-
MAX = 4, //!< 枚举类型最大值
|
|
764
|
-
};
|
|
765
|
-
//!
|
|
766
|
-
//! \enum QuantType
|
|
767
|
-
//!
|
|
768
|
-
//! \brief QuantType类型
|
|
769
|
-
//!
|
|
1147
|
+
//! \code
|
|
1148
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
1149
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
1150
|
+
//! ipcrm -a
|
|
1151
|
+
//! \endcode
|
|
1152
|
+
//!
|
|
1153
|
+
struct AllReduceParam {
|
|
1154
|
+
//! \brief 量化类型
|
|
770
1155
|
enum QuantType : int {
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
1156
|
+
QUANT_TYPE_UNQUANT = 0, //!< 默认值
|
|
1157
|
+
QUANT_TYPE_UNDEFINED = 0, //!< 默认值
|
|
1158
|
+
QUANT_TYPE_PER_TENSOR = 1, //!< 对整个张量进行量化
|
|
1159
|
+
QUANT_TYPE_PER_CHANNEL = 2, //!< 对张量中每个channel分别进行量化
|
|
775
1160
|
QUANT_TYPE_MAX = 3, //!< 枚举类型最大值
|
|
776
1161
|
};
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
//! \brief 每张卡所属通信编号.
|
|
1162
|
+
|
|
1163
|
+
//! \brief 当前卡所属通信编号.
|
|
780
1164
|
int rank = 0;
|
|
781
|
-
//! \brief
|
|
1165
|
+
//! \brief 通信的卡的数量.
|
|
782
1166
|
int rankSize = 0;
|
|
783
|
-
//! \brief
|
|
1167
|
+
//! \brief 主通信编号.
|
|
784
1168
|
int rankRoot = 0;
|
|
785
|
-
//! \brief
|
|
786
|
-
|
|
787
|
-
//!
|
|
1169
|
+
//! \brief 通信计算类型,支持"sum","prod","max"和"min".
|
|
1170
|
+
std::string allReduceType = "sum";
|
|
1171
|
+
//!
|
|
1172
|
+
//! \brief 通信计算类型,仅支持"hccl"和"lccl".Atlas 推理系列产品仅支持backend为"hccl"。
|
|
1173
|
+
//!
|
|
1174
|
+
//! backend为"hccl"时,支持"sum","prod","max"和"min"; backend为"lccl"时,支持"sum","max"和"min".
|
|
1175
|
+
//! 当backend为"hccl"时,allReduceType为"prod"时,不支持数据类型为int16和bf16。
|
|
1176
|
+
//! 当backend为"hccl"时,Atlas 推理系列产品不支持int64,bf16,int16只有allReduceType为"sum"时支持
|
|
1177
|
+
//! 当backend为"lccl"时,不支持数据类型int64,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
|
|
1178
|
+
//!
|
|
788
1179
|
std::string backend = "hccl";
|
|
789
|
-
//! \brief HCCL
|
|
1180
|
+
//! \brief HCCL通信域指针.
|
|
1181
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
790
1182
|
HcclComm hcclComm = nullptr;
|
|
791
|
-
//! \brief 通信模式,CommMode
|
|
1183
|
+
//! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
|
|
792
1184
|
CommMode commMode = COMM_MULTI_PROCESS;
|
|
793
|
-
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景。
|
|
794
|
-
std::string rankTableFile;
|
|
795
|
-
//! \brief 权重并行类型。
|
|
796
|
-
ParallelType type = LINEAR_ALL_REDUCE;
|
|
797
|
-
//! \brief 是否返回中间结果,仅在使用ALL_GATHER_LINEAR时生效。
|
|
798
|
-
bool keepIntermediate = false;
|
|
799
|
-
//! \brief 量化类型。
|
|
800
|
-
QuantType quantType = QUANT_TYPE_UNDEFINED;
|
|
801
|
-
//! \brief 量化类型为QUANT_TYPE_PER_GROUP时生效。
|
|
802
|
-
int32_t quantGroupSize = 0;
|
|
803
1185
|
//!
|
|
804
|
-
//!
|
|
805
|
-
//!
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
//!
|
|
1186
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
1187
|
+
//!
|
|
1188
|
+
std::string rankTableFile;
|
|
1189
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
1190
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
1191
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
1192
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
1193
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
809
1194
|
std::string commDomain;
|
|
1195
|
+
//! \brief 量化类型
|
|
1196
|
+
QuantType quantType = QUANT_TYPE_UNQUANT;
|
|
1197
|
+
//! 若为浮点AllReduce,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致;
|
|
1198
|
+
//! 若为量化AllReduce,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
|
|
1199
|
+
//! 量化只支持配置ACL_FLOAT16
|
|
1200
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
1201
|
+
//!
|
|
1202
|
+
//! \brief 预留参数
|
|
1203
|
+
//!
|
|
1204
|
+
uint8_t rsv[64] = {0};
|
|
810
1205
|
};
|
|
811
1206
|
|
|
812
1207
|
//!
|
|
813
|
-
//! \struct
|
|
814
|
-
//!
|
|
815
|
-
//! \brief 稀疏量化linear
|
|
1208
|
+
//! \struct BlockCopyParam
|
|
816
1209
|
//!
|
|
817
|
-
//!
|
|
818
|
-
//! 以此提升算子性能。参数tilingK和tilingN由压缩算法决定,目前均只支持取值为8.
|
|
819
|
-
//! 目前该算子仅支持在Atlas 推理系列产品(配置Atlas 300I DUO)中进行运算。
|
|
1210
|
+
//! \brief 将KVCache里通过src indices指定的block数据copy到dst indices指定的block位置上。
|
|
820
1211
|
//!
|
|
821
|
-
struct
|
|
822
|
-
//!
|
|
823
|
-
|
|
824
|
-
//!
|
|
825
|
-
|
|
826
|
-
//! \brief 压缩参数,由外部压缩算法决定,默认为1,目前仅支持取值为8。
|
|
827
|
-
uint32_t tilingK = 1;
|
|
828
|
-
//! \brief 压缩参数,由外部压缩算法决定,默认为1,目前仅支持取值为8。
|
|
829
|
-
uint32_t tilingN = 1;
|
|
1212
|
+
struct BlockCopyParam {
|
|
1213
|
+
//!
|
|
1214
|
+
//! \brief 预留参数
|
|
1215
|
+
//!
|
|
1216
|
+
uint8_t rsv[16] = {0};
|
|
830
1217
|
};
|
|
831
1218
|
|
|
832
1219
|
//!
|
|
833
|
-
//! \struct
|
|
834
|
-
//!
|
|
835
|
-
//! \brief 暂不支持
|
|
1220
|
+
//! \struct BroadcastParam
|
|
836
1221
|
//!
|
|
837
|
-
|
|
838
|
-
//! \brief 暂不支持
|
|
839
|
-
bool firstTransposeA = false;
|
|
840
|
-
//! \brief 暂不支持
|
|
841
|
-
bool firstTransposeB = false;
|
|
842
|
-
//! \brief 暂不支持
|
|
843
|
-
bool firstHasBias = true;
|
|
844
|
-
//! \brief 暂不支持
|
|
845
|
-
ActivationType activationType = ACTIVATION_FAST_GELU;
|
|
846
|
-
//! \brief 暂不支持
|
|
847
|
-
bool secondTransposeA = false;
|
|
848
|
-
//! \brief 暂不支持
|
|
849
|
-
bool secondTransposeB = false;
|
|
850
|
-
//! \brief 暂不支持
|
|
851
|
-
bool secondHasBias = true;
|
|
852
|
-
};
|
|
853
|
-
|
|
1222
|
+
//! \brief 将通信主卡上的数据广播到其他每张卡上, 该算子不支持Atlas 推理系列产品。
|
|
854
1223
|
//!
|
|
855
|
-
//!
|
|
1224
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
1225
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
856
1226
|
//!
|
|
857
|
-
//! \
|
|
1227
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
1228
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
858
1229
|
//!
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
1230
|
+
//! \code
|
|
1231
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
1232
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
1233
|
+
//! ipcrm -a
|
|
1234
|
+
//! \endcode
|
|
1235
|
+
//!
|
|
1236
|
+
|
|
1237
|
+
struct BroadcastParam {
|
|
1238
|
+
//! \brief 当前卡所属通信编号.
|
|
1239
|
+
int rank = 0;
|
|
1240
|
+
//! \brief 通信的卡的数量.
|
|
1241
|
+
int rankSize = 0;
|
|
1242
|
+
//! \brief 主通信编号.
|
|
1243
|
+
int rankRoot = 0;
|
|
1244
|
+
//! \brief HCCL通信域指针.
|
|
1245
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
1246
|
+
HcclComm hcclComm = nullptr;
|
|
1247
|
+
//! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
|
|
1248
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
1249
|
+
//! \brief 通信后端指示,仅支持"hccl"和"lccl"。
|
|
1250
|
+
std::string backend = "hccl";
|
|
1251
|
+
//!
|
|
1252
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
1253
|
+
//!
|
|
1254
|
+
std::string rankTableFile;
|
|
1255
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
1256
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
1257
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
1258
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
1259
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
1260
|
+
std::string commDomain;
|
|
1261
|
+
//!
|
|
1262
|
+
//! \brief 预留参数
|
|
1263
|
+
//!
|
|
1264
|
+
uint8_t rsv[64] = {0};
|
|
1265
|
+
};
|
|
1266
|
+
|
|
1267
|
+
//!
|
|
1268
|
+
//! \struct ReduceScatterParam
|
|
1269
|
+
//!
|
|
1270
|
+
//!
|
|
1271
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
1272
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
1273
|
+
//!
|
|
1274
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
1275
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
1276
|
+
//!
|
|
1277
|
+
//! \code
|
|
1278
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
1279
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
1280
|
+
//! ipcrm -a
|
|
1281
|
+
//! \endcode
|
|
1282
|
+
//!
|
|
1283
|
+
struct ReduceScatterParam {
|
|
1284
|
+
//! \brief 当前卡所属通信编号.
|
|
1285
|
+
int rank = 0;
|
|
1286
|
+
//! \brief 通信的卡的数量.
|
|
1287
|
+
int rankSize = 0;
|
|
1288
|
+
//! \brief 主通信编号.
|
|
1289
|
+
int rankRoot = 0;
|
|
1290
|
+
//! \brief 当前通信计算类型仅支持"sum","max"和"min",不支持"prod"。
|
|
1291
|
+
std::string reduceType = "sum";
|
|
1292
|
+
//! \brief HCCL通信域指针。
|
|
1293
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子。
|
|
1294
|
+
HcclComm hcclComm = nullptr;
|
|
1295
|
+
//! \brief 通信模式,CommMode类型枚举值。
|
|
1296
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
1297
|
+
//! \brief 通信后端指示,当"backend"为lccl且机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
|
|
1298
|
+
std::string backend = "lccl";
|
|
1299
|
+
//! \brief 集群信息的配置文件路径。
|
|
1300
|
+
std::string rankTableFile;
|
|
1301
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
1302
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
1303
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
1304
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
1305
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
1306
|
+
std::string commDomain;
|
|
1307
|
+
//!
|
|
1308
|
+
//! \brief 预留参数
|
|
1309
|
+
//!
|
|
1310
|
+
uint8_t rsv[64] = {0};
|
|
1311
|
+
};
|
|
1312
|
+
|
|
1313
|
+
//!
|
|
1314
|
+
//! \struct ReduceScatterVParam
|
|
1315
|
+
//!
|
|
1316
|
+
//!
|
|
1317
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
1318
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
|
|
1319
|
+
//!
|
|
1320
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
1321
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
1322
|
+
//!
|
|
1323
|
+
//! \code
|
|
1324
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
1325
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
1326
|
+
//! ipcrm -a
|
|
1327
|
+
//! \endcode
|
|
1328
|
+
//!
|
|
1329
|
+
struct ReduceScatterVParam {
|
|
1330
|
+
//! \brief 当前卡所属通信编号.
|
|
1331
|
+
int rank = 0;
|
|
1332
|
+
//! \brief 通信的卡的数量.
|
|
1333
|
+
int rankSize = 0;
|
|
1334
|
+
//! \brief 主通信编号.
|
|
1335
|
+
int rankRoot = 0;
|
|
1336
|
+
//! \brief 表示发送数据量的数组.
|
|
1337
|
+
//! 例如,若发送的数据类型为float32,sendCounts[i] = n 表示本rank发给rank i n个float32数据。
|
|
1338
|
+
std::vector<int64_t> sendCounts;
|
|
1339
|
+
//! \brief 表示发送偏移量的数组.
|
|
1340
|
+
//! sdispls[i] = n表示本rank从相对于输入起始位置的的偏移量为n的位置开始发送数据给rank i
|
|
1341
|
+
std::vector<int64_t> sdispls;
|
|
1342
|
+
//! \brief 表示接收数据量.
|
|
1343
|
+
std::int64_t recvCount = 0;
|
|
1344
|
+
//!
|
|
1345
|
+
//! \brief 当前通信计算类型仅支持"sum","max"和"min",不支持"prod"。
|
|
1346
|
+
std::string reduceType = "sum";
|
|
1347
|
+
//! \brief HCCL通信域指针。 当前算子仅支持lccl,此参数为预留参数。
|
|
1348
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子。
|
|
1349
|
+
HcclComm hcclComm = nullptr;
|
|
1350
|
+
//! \brief 通信模式,CommMode类型枚举值。
|
|
1351
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
1352
|
+
//! \brief 通信后端指示,当前算子仅支持"hccl"
|
|
1353
|
+
std::string backend = "hccl";
|
|
1354
|
+
//! \brief 集群信息的配置文件路径。
|
|
1355
|
+
std::string rankTableFile;
|
|
1356
|
+
//! \brief 通信device组用通信域名标识。
|
|
1357
|
+
std::string commDomain;
|
|
1358
|
+
//!
|
|
1359
|
+
//! \brief 预留参数
|
|
1360
|
+
//!
|
|
1361
|
+
uint8_t rsv[64] = {0};
|
|
1362
|
+
};
|
|
1363
|
+
|
|
1364
|
+
//!
|
|
1365
|
+
//! \brief 判断参数是否相同
|
|
1366
|
+
//!
|
|
1367
|
+
//! \param left
|
|
1368
|
+
//! \param right
|
|
1369
|
+
//! \return bool
|
|
1370
|
+
//!
|
|
1371
|
+
inline bool operator==(const ReduceScatterVParam &left, const ReduceScatterVParam &right)
|
|
1372
|
+
{
|
|
1373
|
+
return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
|
|
1374
|
+
left.sendCounts == right.sendCounts && left.sdispls == right.sdispls && left.recvCount == right.recvCount &&
|
|
1375
|
+
left.reduceType == right.reduceType && left.hcclComm == right.hcclComm && left.commMode == right.commMode &&
|
|
1376
|
+
left.backend == right.backend && left.rankTableFile == right.rankTableFile &&
|
|
1377
|
+
left.commDomain == right.commDomain;
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
//!
|
|
1381
|
+
//! \struct LinearParam
|
|
1382
|
+
//!
|
|
1383
|
+
//! \brief 将A、B两个矩阵进行矩阵乘运算,同时可以选择对矩阵乘的运算结果进行叠加偏置、InplaceAdd融合或反量化操作。
|
|
1384
|
+
//!
|
|
1385
|
+
//! \note 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行矩阵乘运算,可通过参数transposeA与transposeB控制做矩
|
|
1386
|
+
//! 阵乘前是否需要对A矩阵和B矩阵进行行列转置,根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系。例如,当transposeA为false,
|
|
1387
|
+
//! transposeB为true时,x和weight的shape可以分别为[m, k]和[n, k]。
|
|
1388
|
+
//!
|
|
1389
|
+
//! \note 该算子支持浮点和量化场景,当参数outDataType值为ACL_DT_UNDEFINED时为浮点场景,否则为量化场景。
|
|
1390
|
+
//!
|
|
1391
|
+
struct LinearParam {
|
|
1392
|
+
//! \brief Matmul所有计算类型。
|
|
1393
|
+
enum MatmulType : uint8_t {
|
|
1394
|
+
MATMUL_UNDEFINED = 0,
|
|
1395
|
+
MATMUL_EIN_SUM
|
|
1396
|
+
};
|
|
1397
|
+
//!
|
|
1398
|
+
//! \brief 是否转置A矩阵。
|
|
1399
|
+
//!
|
|
1400
|
+
//! \note 默认值为false,不转置。
|
|
1401
|
+
//!
|
|
1402
|
+
//! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为false。
|
|
1403
|
+
//!
|
|
1404
|
+
bool transposeA = false;
|
|
1405
|
+
//!
|
|
1406
|
+
//! \brief 是否转置B矩阵。
|
|
1407
|
+
//!
|
|
1408
|
+
//! \note 默认值为true,转置。
|
|
1409
|
+
//!
|
|
1410
|
+
//! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为true。
|
|
1411
|
+
//!
|
|
1412
|
+
bool transposeB = true;
|
|
1413
|
+
//!
|
|
1414
|
+
//! \brief 是否叠加偏置。
|
|
1415
|
+
//!
|
|
1416
|
+
//! \note 默认值为true,叠加偏置。
|
|
1417
|
+
//!
|
|
1418
|
+
//! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为true。
|
|
1419
|
+
//!
|
|
1420
|
+
//! \warning enAccum为true时,仅支持配置为false。
|
|
1421
|
+
//!
|
|
1422
|
+
bool hasBias = true;
|
|
1423
|
+
//!
|
|
1424
|
+
//! \brief 输出数据类型。
|
|
1425
|
+
//!
|
|
1426
|
+
//! \note 默认值为ACL_DT_UNDEFINED。
|
|
1427
|
+
//!
|
|
1428
|
+
//! \warning 浮点场景下:支持配置为ACL_DT_UNDEFINED。
|
|
1429
|
+
//!
|
|
1430
|
+
//! \warning 量化场景下:Atlas 800I A2推理产品支持配置为ACL_FLOAT16/ACL_BF16,否则,仅支持配置为ACL_FLOAT16。
|
|
1431
|
+
//!
|
|
1432
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
1433
|
+
//!
|
|
1434
|
+
//! \brief 是否使能累加。
|
|
1435
|
+
//!
|
|
1436
|
+
//! \note 默认值为false,不使能累加。
|
|
1437
|
+
//!
|
|
1438
|
+
//! \warning 仅在Atlas 800I A2推理产品支持配置为true。
|
|
1439
|
+
//!
|
|
1440
|
+
//! \warning hasBias为true时,仅支持配置为false。
|
|
1441
|
+
//!
|
|
1442
|
+
//! \warning 量化场景下,仅支持配置为false。
|
|
1443
|
+
//!
|
|
1444
|
+
bool enAccum = false;
|
|
1445
|
+
//!
|
|
1446
|
+
//! \brief matmul类型
|
|
1447
|
+
//!
|
|
1448
|
+
//! \note 默认值为MATMUL_UNDEFINED,非爱因斯坦乘场景。
|
|
1449
|
+
//!
|
|
1450
|
+
//! \warning 取值范围为MATMUL_UNDEFINED/MATMUL_EIN_SUM。
|
|
1451
|
+
//!
|
|
1452
|
+
MatmulType matmulType = MATMUL_UNDEFINED;
|
|
1453
|
+
//!
|
|
1454
|
+
//! \brief 预留参数
|
|
1455
|
+
//!
|
|
1456
|
+
uint8_t rsv[22] = {0};
|
|
1457
|
+
};
|
|
1458
|
+
|
|
1459
|
+
//!
|
|
1460
|
+
//! \struct LinearParallelParam
|
|
1461
|
+
//!
|
|
1462
|
+
//! \brief 通信计算并行算子,该算子功能为linear和通信算子组合
|
|
1463
|
+
//!
|
|
1464
|
+
//! 通信和计算是并行处理,和串行相比存在大幅度性能提升.
|
|
1465
|
+
//!
|
|
1466
|
+
//! \see LinearParam,AllReduceParam,AllGatherParam
|
|
1467
|
+
//!
|
|
1468
|
+
struct LinearParallelParam {
|
|
1469
|
+
//!
|
|
1470
|
+
//! \enum ParallelType
|
|
1471
|
+
//!
|
|
1472
|
+
//! \brief 通信类型
|
|
1473
|
+
//!
|
|
1474
|
+
enum ParallelType : int {
|
|
1475
|
+
UNDEFINED = -1, //!< 默认值
|
|
1476
|
+
LINEAR_ALL_REDUCE = 0, //!< linear+AllReduce
|
|
1477
|
+
LINEAR_REDUCE_SCATTER = 1, //!< linear+reduce_scatter
|
|
1478
|
+
ALL_GATHER_LINEAR = 2, //!< AllGather+linear
|
|
1479
|
+
PURE_LINEAR = 3, //!< linear
|
|
1480
|
+
ALL_GATHER_LINEAR_REDUCE_SCATTER = 4, //!< AllGather+linear+reduce_scatter
|
|
1481
|
+
MAX = 5, //!< 枚举类型最大值
|
|
1482
|
+
};
|
|
1483
|
+
//!
|
|
1484
|
+
//! \enum QuantType
|
|
1485
|
+
//!
|
|
1486
|
+
//! \brief QuantType类型
|
|
1487
|
+
//!
|
|
1488
|
+
enum QuantType : int {
|
|
1489
|
+
QUANT_TYPE_UNDEFINED = -1, //!< 默认值
|
|
1490
|
+
QUANT_TYPE_UNQUANT = -1, //!< 默认值
|
|
1491
|
+
QUANT_TYPE_PER_TENSOR = 0, //!< 对整个张量进行量化
|
|
1492
|
+
QUANT_TYPE_PER_CHANNEL = 1, //!< 对张量中每个channel分别进行量化
|
|
1493
|
+
QUANT_TYPE_PER_GROUP = 2, //!< 将张量按quantGroupSize划分后,分别进行量化
|
|
1494
|
+
QUANT_TYPE_MAX = 3, //!< 枚举类型最大值
|
|
1495
|
+
};
|
|
1496
|
+
//! \brief 权重是否需要转置,默认为true。
|
|
1497
|
+
bool transWeight = true;
|
|
1498
|
+
//! \brief 当前卡所属通信编号.
|
|
1499
|
+
int rank = 0;
|
|
1500
|
+
//! \brief 通信的卡的数量
|
|
1501
|
+
int rankSize = 0;
|
|
1502
|
+
//! \brief 主通信编号
|
|
1503
|
+
int rankRoot = 0;
|
|
1504
|
+
//! \brief 是否叠加残差。配置为false时不叠加残差,为true时叠加残差。默认不叠加残差。
|
|
1505
|
+
bool hasResidual = false;
|
|
1506
|
+
//! \brief 通信后端指示。支持"hccl","lccl","lcoc"。
|
|
1507
|
+
std::string backend = "hccl";
|
|
1508
|
+
//! \brief HCCL通信域接口获取的地址指针,仅当"hcclComm"不为nullptr时可用。
|
|
1509
|
+
HcclComm hcclComm = nullptr;
|
|
1510
|
+
//! \brief 通信模式,CommMode类型枚举值
|
|
1511
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
1512
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景。
|
|
1513
|
+
std::string rankTableFile;
|
|
1514
|
+
//! \brief 权重并行类型。
|
|
1515
|
+
ParallelType type = LINEAR_ALL_REDUCE;
|
|
1516
|
+
//! \brief 是否返回中间结果,仅在使用ALL_GATHER_LINEAR时生效。
|
|
1517
|
+
bool keepIntermediate = false;
|
|
1518
|
+
//! \brief 量化类型。
|
|
1519
|
+
QuantType quantType = QUANT_TYPE_UNQUANT;
|
|
1520
|
+
//! \brief 量化类型为QUANT_TYPE_PER_GROUP时生效。
|
|
1521
|
+
int32_t quantGroupSize = 0;
|
|
1522
|
+
//!
|
|
1523
|
+
//! 若为浮点linear,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致,
|
|
1524
|
+
//! 若为量化linear,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
|
|
1525
|
+
//! 如ACL_FLOAT16/ACL_BF16
|
|
1526
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
1527
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
1528
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
1529
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
1530
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
1531
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
1532
|
+
std::string commDomain;
|
|
1533
|
+
//! \brief AllGather_Matmul_ReduceScatter算子参数结构体
|
|
1534
|
+
struct TwoDimTPInfo {
|
|
1535
|
+
//! \brief 表示ag轴卡数,规定x轴方向是非连续卡号
|
|
1536
|
+
uint16_t agDim = 0;
|
|
1537
|
+
//! \brief 表示rs轴卡数,规定y轴方向是连续卡号
|
|
1538
|
+
uint16_t rsDim = 0;
|
|
1539
|
+
//! \brief 是否沿着内轴进行allgather通信
|
|
1540
|
+
uint8_t innerDimIsAg = 1;
|
|
1541
|
+
//! \brief 填充满8字节
|
|
1542
|
+
uint8_t rsv[3] = {0};
|
|
1543
|
+
};
|
|
1544
|
+
//! \brief AllGather_Matmul_ReduceScatter算子参数
|
|
1545
|
+
TwoDimTPInfo twoDimTPInfo;
|
|
1546
|
+
//!
|
|
1547
|
+
//! \brief 预留参数
|
|
1548
|
+
//!
|
|
1549
|
+
uint8_t rsv[56] = {0};
|
|
1550
|
+
};
|
|
1551
|
+
|
|
1552
|
+
//!
|
|
1553
|
+
//! \struct LinearSparseParam
|
|
1554
|
+
//!
|
|
1555
|
+
//! \brief 稀疏量化linear
|
|
1556
|
+
//!
|
|
1557
|
+
//! 该算子实现功能与量化linear类似。不同点在于稀疏量化算子会使用压缩工具提前对weight输入进行压缩,
|
|
1558
|
+
//! 以此提升算子性能。参数tilingK和tilingN由压缩算法决定,目前均只支持取值为8.
|
|
1559
|
+
//! 目前该算子仅支持在Atlas 推理系列产品中进行运算。
|
|
1560
|
+
//!
|
|
1561
|
+
struct LinearSparseParam {
|
|
1562
|
+
//! \brief 是否转置A矩阵,默认不转置。当前仅支持transposeA = false。
|
|
1563
|
+
bool transposeA = false;
|
|
1564
|
+
//! \brief 是否转置B矩阵,默认转置。当前仅支持transposeB = true。
|
|
1565
|
+
bool transposeB = true;
|
|
1566
|
+
//! \brief 压缩参数,由外部压缩算法决定,默认为8,目前仅支持取值为8。
|
|
1567
|
+
uint32_t tilingK = 8;
|
|
1568
|
+
//! \brief 压缩参数,由外部压缩算法决定,默认为8,目前仅支持取值为8。
|
|
1569
|
+
uint32_t tilingN = 8;
|
|
1570
|
+
//!
|
|
1571
|
+
//! \brief 预留参数
|
|
1572
|
+
//!
|
|
1573
|
+
uint8_t rsv[12] = {0};
|
|
1574
|
+
};
|
|
1575
|
+
|
|
1576
|
+
//!
|
|
1577
|
+
//! \brief 旋转位置编码。hiddenSizeQ必须是hiddenSizeK的整数倍且满足hiddenSizeQ = headDim * headNum。
|
|
1578
|
+
//!
|
|
1579
|
+
struct RopeParam {
|
|
1580
|
+
//! \brief rope,旋转系数,对半旋转是2,支持配置2、4或headDim / 2。
|
|
1581
|
+
int32_t rotaryCoeff = 4;
|
|
1582
|
+
//! \brief 训练用参数,支持配置0或1
|
|
1583
|
+
int32_t cosFormat = 0;
|
|
1584
|
+
//!
|
|
1585
|
+
//! \brief 预留参数
|
|
1586
|
+
//!
|
|
1587
|
+
uint8_t rsv[8] = {0};
|
|
1588
|
+
};
|
|
1589
|
+
|
|
1590
|
+
//!
|
|
1591
|
+
//! \brief 判断参数是否相同
|
|
1592
|
+
//!
|
|
1593
|
+
//! \param left
|
|
1594
|
+
//! \param right
|
|
1595
|
+
//! \return bool
|
|
1596
|
+
//!
|
|
1597
|
+
inline bool operator==(const RopeParam &left, const RopeParam &right)
|
|
1598
|
+
{
|
|
1599
|
+
return left.rotaryCoeff == right.rotaryCoeff && left.cosFormat == right.cosFormat;
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
//!
|
|
1603
|
+
//! \brief 旋转位置编码后进行concat操作。hiddenSizeQ必须是hiddenSizeK的整数倍且满足hiddenSizeQ = headDim * headNum。
|
|
1604
|
+
//!
|
|
1605
|
+
struct RopeQConcatParam {
|
|
1606
|
+
//!
|
|
1607
|
+
//! \brief 预留参数
|
|
1608
|
+
//!
|
|
1609
|
+
uint8_t rsv[16] = {0};
|
|
1610
|
+
};
|
|
1611
|
+
|
|
1612
|
+
//!
|
|
1613
|
+
//! \brief 判断参数是否相同
|
|
1614
|
+
//!
|
|
1615
|
+
//! \param left
|
|
1616
|
+
//! \param right
|
|
1617
|
+
//! \return bool
|
|
1618
|
+
//!
|
|
1619
|
+
inline bool operator==(const RopeQConcatParam &left, const RopeQConcatParam &right)
|
|
1620
|
+
{
|
|
1621
|
+
(void)left;
|
|
1622
|
+
(void)right;
|
|
1623
|
+
return true;
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
//!
|
|
1627
|
+
//! \struct RelayAttentionParam
|
|
1628
|
+
//!
|
|
1629
|
+
//! \brief 通过减少共享组的kv搬运来优化模型吞吐量
|
|
1630
|
+
//!
|
|
1631
|
+
//!
|
|
1632
|
+
struct RelayAttentionParam {
|
|
1633
|
+
//!
|
|
1634
|
+
//! \brief head数量
|
|
1635
|
+
//!
|
|
1636
|
+
//! \note 默认值为0
|
|
1637
|
+
//!
|
|
1638
|
+
int32_t headNum = 0;
|
|
1639
|
+
//!
|
|
1640
|
+
//! \brief 算子tor值
|
|
1641
|
+
//!
|
|
1642
|
+
//! \note 默认值为1.0
|
|
1643
|
+
//!
|
|
1644
|
+
float qkScale = 1;
|
|
1645
|
+
//!
|
|
1646
|
+
//! \brief kv头数量
|
|
1647
|
+
//! \warning 取值范围为[0,8]
|
|
1648
|
+
//! \note 默认值为0
|
|
1649
|
+
//!
|
|
1650
|
+
int32_t kvHeadNum = 0;
|
|
1651
|
+
//!
|
|
1652
|
+
//! \enum MaskType
|
|
1653
|
+
//!
|
|
1654
|
+
//! \brief mask类型
|
|
1655
|
+
//!
|
|
1656
|
+
enum MaskType : int {
|
|
1657
|
+
MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
|
|
1658
|
+
MASK_TYPE_NORM, //!< 倒三角mask
|
|
1659
|
+
};
|
|
1660
|
+
//!
|
|
1661
|
+
//! \brief mask类型
|
|
1662
|
+
//!
|
|
1663
|
+
//! \note 默认值为MASK_TYPE_UNDEFINED
|
|
1664
|
+
//!
|
|
1665
|
+
MaskType maskType = MASK_TYPE_UNDEFINED;
|
|
1666
|
+
//!
|
|
1667
|
+
//! \brief 预留参数
|
|
1668
|
+
//!
|
|
1669
|
+
uint8_t rsv[32] = {0};
|
|
1670
|
+
};
|
|
1671
|
+
|
|
1672
|
+
//!
|
|
1673
|
+
//! \brief KVCache+KVCache+Muls+FlashAttention.
|
|
1674
|
+
//!
|
|
1675
|
+
struct SelfAttentionParam {
|
|
1676
|
+
//!
|
|
1677
|
+
//! \enum CalcType
|
|
1678
|
+
//!
|
|
1679
|
+
//! \brief 计算类型
|
|
1680
|
+
//!
|
|
1681
|
+
enum CalcType : int {
|
|
1682
|
+
UNDEFINED = 0, //!< decoder&encoder for flashAttention
|
|
1683
|
+
ENCODER, //!< encoder for flashAttention
|
|
1684
|
+
DECODER, //!< decoder for flashAttention
|
|
1685
|
+
PA_ENCODER, //!< encoder for pagedAttention
|
|
1686
|
+
PREFIX_ENCODER, //!< prefix encoder for flashAttention
|
|
1687
|
+
};
|
|
1688
|
+
//!
|
|
1689
|
+
//! \enum KernelType
|
|
1690
|
+
//!
|
|
1691
|
+
//! \brief 算子内核精度类型
|
|
1692
|
+
//!
|
|
1693
|
+
enum KernelType : int {
|
|
1694
|
+
KERNELTYPE_DEFAULT = 0, //!< i:float16, bmm:float16, o:float16
|
|
1695
|
+
KERNELTYPE_HIGH_PRECISION //!< i:float16, bmm:float, o:float16
|
|
1696
|
+
};
|
|
1697
|
+
//!
|
|
1698
|
+
//! \enum ClampType
|
|
1699
|
+
//!
|
|
1700
|
+
//! \brief clamp类型
|
|
1701
|
+
//!
|
|
1702
|
+
enum ClampType : int {
|
|
1703
|
+
CLAMP_TYPE_UNDEFINED = 0, //!< 不做clamp
|
|
1704
|
+
CLAMP_TYPE_MIN_MAX //!< 做clamp,同时指定最大最小值
|
|
1705
|
+
};
|
|
1706
|
+
//!
|
|
1707
|
+
//! \enum MaskType
|
|
1708
|
+
//!
|
|
1709
|
+
//! \brief mask类型
|
|
1710
|
+
//!
|
|
1711
|
+
enum MaskType : int {
|
|
1712
|
+
MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
|
|
1713
|
+
MASK_TYPE_NORM, //!< 倒三角mask
|
|
1714
|
+
MASK_TYPE_ALIBI, //!< alibi mask
|
|
1715
|
+
MASK_TYPE_NORM_COMPRESS, //!< 倒三角压缩mask
|
|
1716
|
+
MASK_TYPE_ALIBI_COMPRESS, //!< alibi压缩mask
|
|
1717
|
+
MASK_TYPE_ALIBI_COMPRESS_SQRT, //!< alibi压缩开平方mask
|
|
1718
|
+
MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN, //!< alibi压缩mask左对齐,只支持Atlas 800I A2推理产品
|
|
1719
|
+
MASK_TYPE_SLIDING_WINDOW_NORM, //!< sliding window attention mask
|
|
1720
|
+
MASK_TYPE_SLIDING_WINDOW_COMPRESS //!< sliding window attention压缩mask
|
|
1721
|
+
};
|
|
1722
|
+
//!
|
|
1723
|
+
//! \enum KvCacheCfg
|
|
1724
|
+
//!
|
|
1725
|
+
//! \brief KvCache配置,不支持calcType为PA_ENCODER
|
|
1726
|
+
//!
|
|
1727
|
+
enum KvCacheCfg : int {
|
|
1728
|
+
K_CACHE_V_CACHE = 0, //!< 默认值,进行kvcache处理
|
|
1729
|
+
K_BYPASS_V_BYPASS, //!< 直接传入kvcache
|
|
1730
|
+
};
|
|
1731
|
+
//!
|
|
1732
|
+
//! \enum ScaleType
|
|
1733
|
+
//!
|
|
1734
|
+
//! \brief The type values of ScaleType.
|
|
1735
|
+
//!
|
|
1736
|
+
enum ScaleType : int {
|
|
1737
|
+
SCALE_TYPE_TOR = 0, //!< 默认值,不开启LogN缩放
|
|
1738
|
+
SCALE_TYPE_LOGN, //!< 注意力使用LogN缩放,quantType只能是0
|
|
1739
|
+
SCALE_TYPE_MAX //!< 边界值,仅用于判断是否出界
|
|
1740
|
+
};
|
|
1741
|
+
|
|
1742
|
+
//! \enum QuantType
|
|
1743
|
+
//!
|
|
1744
|
+
//! \brief quant类型
|
|
1745
|
+
//!
|
|
1746
|
+
enum QuantType : int {
|
|
1747
|
+
TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
|
|
1748
|
+
TYPE_QUANT_UNQUANT = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
|
|
1749
|
+
TYPE_DEQUANT_FUSION = 1, //!< 与反量化融合, 预留类型,当前不能够取此值。
|
|
1750
|
+
TYPE_QUANT_QKV_OFFLINE = 2, //!< 离线INT8量化, 只支持Atlas 800I A2推理产品
|
|
1751
|
+
TYPE_QUANT_QKV_ONLINE = 3 //!< 在线INT8量化, 只支持Atlas 800I A2推理产品
|
|
1752
|
+
};
|
|
1753
|
+
//!
|
|
1754
|
+
//! \enum CacheType
|
|
1755
|
+
//!
|
|
1756
|
+
//! \brief cache内部排布类型, 为CACHE_TYPE_SWA开启SWA KVCache优化,只储存后windowSize个token的KVCache,
|
|
1757
|
+
//! 控制KVCache的长度不超过windowSize, 以此减少显存占用
|
|
1758
|
+
//!
|
|
1759
|
+
enum CacheType : int8_t {
|
|
1760
|
+
CACHE_TYPE_NORM = 0, //!< 正常cache
|
|
1761
|
+
CACHE_TYPE_SWA = 1 //!< 固定长度cache
|
|
1762
|
+
};
|
|
1763
|
+
//!
|
|
1764
|
+
//! 量化类型(只支持PA_ENCODER):
|
|
1765
|
+
//! 当值为TYPE_QUANT_QKV_OFFLINE或TYPE_QUANT_QKV_ONLINE时q,k,v为int8。key,value的headsize等长,范围为(0, 256],
|
|
1766
|
+
//! 且32对齐。outdatatype需要配置,只能是ACL_FLOAT16或ACL_BF16。inputLayout只支持TYPE_BSND,calcType只能为PA_ENCODER。
|
|
1767
|
+
QuantType quantType = TYPE_QUANT_UNQUANT;
|
|
1768
|
+
|
|
1769
|
+
//! output数据类型:只支持PA_ENCODER,且QuantType不为TYPE_QUANT_UNQUANT(格式为aclDataType)
|
|
1770
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
1771
|
+
|
|
1772
|
+
//! query头大小, 需大于0
|
|
1773
|
+
int32_t headNum = 0;
|
|
1774
|
+
//! kv头数量, 该值需要用户根据使用的模型实际情况传入
|
|
1775
|
+
//! kvHeadNum = 0时,keyCache的k_head_num,valueCache的v_head_num与query的num_heads一致,均为num_heads的数值
|
|
1776
|
+
//! kvHeadNum != 0时,keyCache的k_head_num, valueCache的v_head_num与kvHeadNum值相同
|
|
1777
|
+
int32_t kvHeadNum = 0;
|
|
1778
|
+
//! query缩放系数
|
|
1779
|
+
float qScale = 1;
|
|
1780
|
+
//! 算子tor值, 在Q*K^T后乘
|
|
1781
|
+
float qkScale = 1;
|
|
1782
|
+
//! 是否开启动态batch
|
|
1783
|
+
bool batchRunStatusEnable = false;
|
|
1784
|
+
//! 是否开启倒三角优化, 只有mask为倒三角的时候才能开启优化
|
|
1785
|
+
uint32_t isTriuMask = 0;
|
|
1786
|
+
//! 计算类型
|
|
1787
|
+
CalcType calcType = UNDEFINED;
|
|
1788
|
+
//! 内核精度类型
|
|
1789
|
+
KernelType kernelType = KERNELTYPE_DEFAULT;
|
|
1790
|
+
//! clamp类型
|
|
1791
|
+
ClampType clampType = CLAMP_TYPE_UNDEFINED;
|
|
1792
|
+
//! clamp功能最小值
|
|
1793
|
+
float clampMin = 0;
|
|
1794
|
+
//! clamp功能最大值
|
|
1795
|
+
float clampMax = 0;
|
|
1796
|
+
//! mask类型
|
|
1797
|
+
MaskType maskType = MASK_TYPE_UNDEFINED;
|
|
1798
|
+
//! kvcache配置
|
|
1799
|
+
KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE;
|
|
1800
|
+
//! scale类型
|
|
1801
|
+
ScaleType scaleType = SCALE_TYPE_TOR;
|
|
1802
|
+
//! 数据排布格式默认为BSND
|
|
1803
|
+
InputLayout inputLayout = TYPE_BSND;
|
|
1804
|
+
//! \brief 大于0时开启MLA合并kvcache功能,表示kv合并传入时v的head_size
|
|
1805
|
+
//! \note 默认值为0
|
|
1806
|
+
//! \warning 取值范围为[0,576]
|
|
1807
|
+
uint32_t mlaVHeadSize = 0;
|
|
1808
|
+
//! \brief cache内部排布,开启SWA特性并设置为CACHE_TYPE_SWA可以开启SWA cache优化
|
|
1809
|
+
//! \note 默认值为CACHE_TYPE_NORM
|
|
1810
|
+
//! \warning 只有开启SWA特性后才可以是CACHE_TYPE_SWA
|
|
1811
|
+
CacheType cacheType = CACHE_TYPE_NORM;
|
|
1812
|
+
//! \brief windowSize大于0时开启SWA特性,开启SWA特性后表示sliding window 大小
|
|
1813
|
+
//! \note 默认值为0
|
|
1814
|
+
//! \warning windowSize大于0时需要将maskType设置为MASK_TYPE_SLIDING_WINDOW_NORM或MASK_TYPE_SLIDING_WINDOW_COMPRESS
|
|
1815
|
+
uint32_t windowSize = 0;
|
|
1816
|
+
//!
|
|
1817
|
+
//! \brief 预留参数
|
|
1818
|
+
//!
|
|
1819
|
+
uint8_t rsv[64] = {0};
|
|
1820
|
+
};
|
|
1821
|
+
|
|
1822
|
+
//!
|
|
1823
|
+
//! \brief PagedAttention.
|
|
1824
|
+
//!
|
|
1825
|
+
//! 一个Q有多个token,一个token对应多个KV的token,以token0为例,block_table代表其对应的KV的block_id,-1代表截止,
|
|
1826
|
+
//! 所以第二行和第四行为其目标block,context_lens则表示KV有多少个token,则代表仅有block_id为(3,4,5,9,10)是需要与Q进行计算的。
|
|
1827
|
+
//!
|
|
1828
|
+
struct PagedAttentionParam {
|
|
1829
|
+
//! query 头大小
|
|
1830
|
+
int32_t headNum = 0;
|
|
1831
|
+
//! 算子tor值, 在Q*K^T后乘
|
|
1832
|
+
float qkScale = 1.0;
|
|
1833
|
+
//! kv头数量
|
|
1834
|
+
int32_t kvHeadNum = 0;
|
|
1835
|
+
//!
|
|
1836
|
+
//! \enum MaskType
|
|
1837
|
+
//!
|
|
1838
|
+
//! \brief The type values of MaskType.
|
|
1839
|
+
//!
|
|
1840
|
+
enum MaskType : int {
|
|
1841
|
+
UNDEFINED = 0, //!< 默认值,全0的mask
|
|
1842
|
+
MASK_TYPE_NORM, //!< 倒三角mask
|
|
1843
|
+
MASK_TYPE_ALIBI, //!< alibi mask
|
|
1844
|
+
MASK_TYPE_SPEC //!< 并行解码mask
|
|
1845
|
+
};
|
|
1846
|
+
//! mask类型
|
|
1847
|
+
MaskType maskType = UNDEFINED;
|
|
1848
|
+
//! 是否开启动态batch
|
|
1849
|
+
bool batchRunStatusEnable = false;
|
|
1850
|
+
//!
|
|
1851
|
+
//! \enum QuantType
|
|
1852
|
+
//!
|
|
1853
|
+
//! \brief quant类型
|
|
1854
|
+
//!
|
|
1855
|
+
enum QuantType : int {
|
|
1856
|
+
TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
|
|
1857
|
+
TYPE_QUANT_UNQUANT = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
|
|
1858
|
+
TYPE_DEQUANT_FUSION = 1, //!< 与反量化融合, 只支持Atlas 800I A2推理产品
|
|
1859
|
+
TYPE_QUANT_QKV_OFFLINE = 2, //!< 离线INT8量化, 只支持Atlas 800I A2推理产品
|
|
1860
|
+
TYPE_QUANT_QKV_ONLINE = 3 //!< 在线INT8量化, 只支持Atlas 800I A2推理产品
|
|
1861
|
+
};
|
|
1862
|
+
//!
|
|
1863
|
+
//! 量化类型:
|
|
1864
|
+
//! 为TYPE_QUANT_UNQUANT时q,keyCache,valueCache为bf16/float16。
|
|
1865
|
+
//! 为TYPE_DEQUANT_FUSION时q为bf16/float16,keyCache,valueCache为int8。
|
|
1866
|
+
//! 为TYPE_QUANT_QKV_OFFLINE或TYPE_QUANT_QKV_ONLINE时q,keyCache,valueCache为int8。
|
|
1867
|
+
//! keyCache,valueCache的headsize等长,范围为(0, 256],且block_size * head_size ≤ 128 * 128。
|
|
1868
|
+
//! outdatatype需要配置,只能是ACL_FLOAT16或ACL_BF16。inputLayout只支持TYPE_BSND。
|
|
1869
|
+
QuantType quantType = TYPE_QUANT_UNQUANT;
|
|
1870
|
+
|
|
1871
|
+
//! output数据类型(格式为aclDataType)
|
|
1872
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
1873
|
+
|
|
1874
|
+
//! 开启量化功能后是否使用offset
|
|
1875
|
+
bool hasQuantOffset = false;
|
|
1876
|
+
//!
|
|
1877
|
+
//! \enum CompressType
|
|
1878
|
+
//!
|
|
1879
|
+
//! \brief 压缩类型
|
|
1880
|
+
//!
|
|
1881
|
+
enum CompressType : int {
|
|
1882
|
+
COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
|
|
1883
|
+
COMPRESS_TYPE_KVHEAD, //!< 压缩key_cache, value_cache的kvHead维度, 只支持Atlas 800I A2推理产品。
|
|
1884
|
+
COMPRESS_TYPE_KVHEAD_ROPE, //!< rope场景压缩key_cache, value_cache的kvHead维度, 只支持Atlas 800I A2推理产品。
|
|
1885
|
+
COMPRESS_TYPE_MAX //!< 压缩类型边界值,仅用于判断是否出界,所有情况不能取该值。
|
|
1886
|
+
};
|
|
1887
|
+
//!
|
|
1888
|
+
//! 压缩方式
|
|
1889
|
+
//! 为COMPRESS_TYPE_KVHEAD时,不支持quanttype为2和3。
|
|
1890
|
+
//! 为COMPRESS_TYPE_KVHEAD_ROPE时, maskType需传0。不支持quanttype为2和3。
|
|
1891
|
+
CompressType compressType = COMPRESS_TYPE_UNDEFINED;
|
|
1892
|
+
//!
|
|
1893
|
+
//! \enum CalcType
|
|
1894
|
+
//!
|
|
1895
|
+
//! \brief The type values of CalcType.
|
|
1896
|
+
//!
|
|
1897
|
+
enum CalcType : int {
|
|
1898
|
+
CALC_TYPE_UNDEFINED = 0, //!< 默认值,不开启并行解码
|
|
1899
|
+
CALC_TYPE_SPEC //!< 此计算模式支持传入长度大于1的qseqlen,启用并行解码功能
|
|
1900
|
+
};
|
|
1901
|
+
//! 计算类型
|
|
1902
|
+
CalcType calcType = CALC_TYPE_UNDEFINED;
|
|
1903
|
+
|
|
1904
|
+
//!
|
|
1905
|
+
//! \enum ScaleType
|
|
1906
|
+
//!
|
|
1907
|
+
//! \brief The type values of ScaleType.
|
|
1908
|
+
//!
|
|
1909
|
+
enum ScaleType : int {
|
|
1910
|
+
SCALE_TYPE_TOR = 0, //!< 默认值,不开启LogN缩放
|
|
1911
|
+
SCALE_TYPE_LOGN, //!< 注意力使用LogN缩放
|
|
1912
|
+
SCALE_TYPE_MAX //!< 边界值,仅用于判断是否出界
|
|
1913
|
+
};
|
|
1914
|
+
//! scale类型
|
|
1915
|
+
//! 为SCALE_TYPE_LOGN时,不支持quanttype为2和3。
|
|
1916
|
+
ScaleType scaleType = SCALE_TYPE_TOR;
|
|
1917
|
+
|
|
1918
|
+
//! 数据排布格式默认为BSND
|
|
1919
|
+
InputLayout inputLayout = TYPE_BSND;
|
|
1920
|
+
//! \brief 大于0时开启MLA合并kvcache功能,表示kv合并传入时v的head_size
|
|
1921
|
+
//! \note 默认值为0
|
|
1922
|
+
//! \warning 取值范围为[0,576]
|
|
1923
|
+
uint32_t mlaVHeadSize = 0;
|
|
1924
|
+
//!
|
|
1925
|
+
//! \brief 预留参数
|
|
1926
|
+
//!
|
|
1927
|
+
uint8_t rsv[68] = {0};
|
|
1928
|
+
};
|
|
1929
|
+
|
|
1930
|
+
//!
|
|
1931
|
+
//! \brief 数据格式转换处理。
|
|
1932
|
+
//!
|
|
1933
|
+
//! 使用的NZ的dims约定表示方式:{b, n1, m1m0, n0},对应的ND的dims是{b, m, n},
|
|
1934
|
+
//! 其中:b表示batch,如果batch为1,该维度为1,不可省略。如果batch有多个,该维度为所有batch维度合轴的结果。
|
|
1935
|
+
//! m0/n0表示对齐位,float16时,n0与m0都为16, int8时,n0为32,m0为16,m1m0表示原始ND的m维度经过对齐位向上对齐,
|
|
1936
|
+
//! n1表示原始ND的n维度经过对齐位向上对齐后,除以n0的商。例如原始ND的dims为{8, 100, 30},则其对应的NZ的dims为{8, 2, 112, 16}。
|
|
1937
|
+
//!
|
|
1938
|
+
//! \warning outCrops的长度要求是2,其值须满足以下要求:
|
|
1939
|
+
//! - 如果m0m1落在区间(k1 × 16, (k1 + 1) × 16](其中k1为正整数)内,那么该区间即为outCrops[0]的取值范围要求。
|
|
1940
|
+
//! - 如果n0*n1落在区间(k2 × 16, (k2 + 1) × 16](其中k2为正整数)内,那么该区间即为outCrops[1]的取值范围要求。
|
|
1941
|
+
//!
|
|
1942
|
+
struct TransdataParam {
|
|
1943
|
+
//!
|
|
1944
|
+
//! \enum TransdataType
|
|
1945
|
+
//!
|
|
1946
|
+
//! \brief TransdataType类型值
|
|
1947
|
+
//!
|
|
1948
|
+
enum TransdataType : int {
|
|
1949
|
+
UNDEFINED = 0, //!< 默认
|
|
1950
|
+
FRACTAL_NZ_TO_ND, //!< FRACTAL_NZ转ND
|
|
1951
|
+
ND_TO_FRACTAL_NZ //!< ND转FRACTAL_NZ
|
|
1952
|
+
};
|
|
1953
|
+
//! \brief 数据格式转换类型,支持FRACTAL_NZ和ND互相转换。
|
|
1954
|
+
TransdataType transdataType = UNDEFINED;
|
|
1955
|
+
//! \brief 仅当FRACTAL_NZ转ND时使用,表示原ND数据格式Shape的最后两维。
|
|
1956
|
+
SVector<int64_t> outCrops = {0, 0};
|
|
1957
|
+
//!
|
|
1958
|
+
//! \brief 预留参数
|
|
1959
|
+
//!
|
|
1960
|
+
uint8_t rsv[8] = {0};
|
|
1961
|
+
};
|
|
1962
|
+
|
|
1963
|
+
//!
|
|
1964
|
+
//! \brief 三目运算。
|
|
1965
|
+
//!
|
|
1966
|
+
//! 输入张量为cond,x,y, 输出张量 z = cond ? x : y;
|
|
1967
|
+
//! 输入cond的元素只能是0或者1
|
|
1968
|
+
//! 输出z的维度为输入x与y广播后的结果。要求cond, x, y必须是可广播的。
|
|
1969
|
+
//!
|
|
1970
|
+
struct WhereParam {
|
|
1971
|
+
//!
|
|
1972
|
+
//! \brief 预留参数
|
|
1973
|
+
//!
|
|
1974
|
+
uint8_t rsv[8] = {0};
|
|
1975
|
+
};
|
|
1976
|
+
|
|
1977
|
+
//!
|
|
1978
|
+
//! \brief 将输入Tensor的Shape,按指定轴扩展指定的倍数。
|
|
1979
|
+
//!
|
|
1980
|
+
//! \warning 输出y的维度和multiples维度一致,每个维度大小为输入x广播到multiples维度后和multiples对应维度的乘积。
|
|
1981
|
+
//!
|
|
1982
|
+
struct RepeatParam {
|
|
1983
|
+
//!
|
|
1984
|
+
//! \brief 每一维度上扩展的倍数。
|
|
1985
|
+
//!
|
|
1986
|
+
//! \warning
|
|
1987
|
+
//! - 支持在不超过两个维度上进行扩展
|
|
1988
|
+
//! - multiples的维度小于等于8且需大于或等于输入x的维度,每一个元素要求大于0。
|
|
1989
|
+
//!
|
|
1990
|
+
SVector<int64_t> multiples;
|
|
1991
|
+
//!
|
|
1992
|
+
//! \brief 预留参数
|
|
1993
|
+
//!
|
|
1994
|
+
uint8_t rsv[8] = {0};
|
|
1995
|
+
};
|
|
1996
|
+
|
|
1997
|
+
//!
|
|
1998
|
+
//! \struct SetValueParam
|
|
1999
|
+
//!
|
|
2000
|
+
//! \brief 将输入源张量中的内容拷贝到输入目标张量指定位置中.
|
|
2001
|
+
//!
|
|
2002
|
+
//! 该拷贝为原地拷贝,最终结果修改在输入目标张量中.<br>
|
|
2003
|
+
//! 输入目标张量 dst: [a,b,c], 输入源张量src: [d,e,f].
|
|
2004
|
+
//! dst[starts[0]: ends[0], starts[1]: ends[1], starts[2]: ends[2]] = src.<br>
|
|
2005
|
+
//! 其中 ends[0]-starts[0]需为src第0维的维度大小,ends[1]-starts[1]需为为src第1维的维度大小,ends[2]-starts[2]需为src第2维的维度大小。
|
|
2006
|
+
//!
|
|
2007
|
+
//! \warning 输入src和输入dst的维数须相同.<br>
|
|
2008
|
+
//! 输入src的各维度大小要求小于或等于输入dst对应维度大小.<br>
|
|
2009
|
+
//! 输入src和输入dst的各维度要求有一个或两个维度不相同,且需要满足:
|
|
2010
|
+
//! - 如果有一个维度不相同,则这个维度不能是最高维(第0维)。
|
|
2011
|
+
//! - 如果有两个维度不相同,则其中一个不同的维度必须是最高维(第0维)。
|
|
2012
|
+
//
|
|
2013
|
+
struct SetValueParam {
|
|
2014
|
+
//! \brief 每一维拷贝起始位置
|
|
2015
|
+
SVector<int64_t> starts;
|
|
2016
|
+
//! \brief 每一维拷贝结束位置后一个位置,拷贝到该位置前一个位置为止
|
|
2017
|
+
SVector<int64_t> ends;
|
|
2018
|
+
//! \brief 每一维拷贝步长,当前仅支持strides为全1.
|
|
2019
|
+
SVector<int64_t> strides;
|
|
2020
|
+
//!
|
|
2021
|
+
//! \brief 预留参数
|
|
2022
|
+
//!
|
|
2023
|
+
uint8_t rsv[8] = {0};
|
|
2024
|
+
};
|
|
2025
|
+
|
|
2026
|
+
//!
|
|
2027
|
+
//! \brief 在指定维度上求和、取最大值或最小值,并消除这个维度。
|
|
2028
|
+
//!
|
|
2029
|
+
struct ReduceParam {
|
|
2030
|
+
//!
|
|
2031
|
+
//! \enum ReduceType
|
|
2032
|
+
//!
|
|
2033
|
+
//! \brief ReduceType支持的值
|
|
2034
|
+
//!
|
|
2035
|
+
enum ReduceType {
|
|
2036
|
+
REDUCE_UNDEFINED = 0, //!< 未定义。
|
|
2037
|
+
REDUCE_MAX, //!< 求最大值。
|
|
2038
|
+
REDUCE_MIN, //!< 求最小值。
|
|
2039
|
+
REDUCE_SUM, //!< 求和。
|
|
2040
|
+
};
|
|
2041
|
+
//! \brief reduceType
|
|
2042
|
+
ReduceType reduceType = REDUCE_UNDEFINED;
|
|
2043
|
+
//!
|
|
2044
|
+
//! \brief 指定轴(维度)。
|
|
2045
|
+
//!
|
|
2046
|
+
//! \warning axis不能为空且长度要求小于等于输入x的维度。<br>
|
|
2047
|
+
//! axis可以支持多个轴上进行处理,各元素要求小于x的维度且大于等于0
|
|
2048
|
+
//!
|
|
2049
|
+
SVector<int64_t> axis;
|
|
2050
|
+
//!
|
|
2051
|
+
//! \brief 预留参数
|
|
2052
|
+
//!
|
|
2053
|
+
uint8_t rsv[8] = {0};
|
|
2054
|
+
};
|
|
2055
|
+
|
|
2056
|
+
//!
|
|
2057
|
+
//! \brief 依据给定的词表概率以及top-p,设置随机种子及top-k保留词数,选择最合适的词及对应概率作为输出。
|
|
2058
|
+
//! 支持batch级别随机种子、top-k取样,支持exponential取样
|
|
2059
|
+
//! \warning probs必须是两维张量。
|
|
2060
|
+
//!
|
|
2061
|
+
struct TopkToppSamplingParam {
|
|
2062
|
+
//! \brief 取样处理类型
|
|
2063
|
+
enum TopkToppSamplingType {
|
|
2064
|
+
SAMPLING_UNDEFINED = -1, //!< 未定义
|
|
2065
|
+
SINGLE_TOPK_SAMPLING, //!< 非batch级别随机种子、Topk的取样
|
|
2066
|
+
BATCH_TOPK_MULTINOMIAL_SAMPLING, //!< batch级别随机种子、Topk的multinomial取样
|
|
2067
|
+
BATCH_TOPK_EXPONENTIAL_SAMPLING, //!< batch级别随机种子、Topk的exponential取样
|
|
2068
|
+
BATCH_TOPK_MULTINOMIAL_LOGPROBS_SAMPLING, //!< batch级别随机种子、Topk的multinomial 增加log_Probs取样
|
|
2069
|
+
BATCH_TOPK_EXPONENTIAL_LOGPROBS_SAMPLING, //!< batch级别随机种子、Topk的exponential 增加log_Probs取样
|
|
2070
|
+
SAMPLING_MAX, //!< 枚举最大值
|
|
2071
|
+
};
|
|
2072
|
+
//! \brief 采样类型,默认为非batch级别随机种子、Topk的取样
|
|
2073
|
+
TopkToppSamplingType topkToppSamplingType = SINGLE_TOPK_SAMPLING;
|
|
2074
|
+
//! \brief 当 topkToppSamplingType为BATCH_TOPK_MULTINOMIAL_SAMPLING时使用
|
|
2075
|
+
//! \brief 每个batch下top-p阶段随机抽样使用的随机数种子。
|
|
2076
|
+
//! \brief 维度与batch大小一致。
|
|
2077
|
+
std::vector<uint32_t> randSeeds;
|
|
2078
|
+
//! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
|
|
2079
|
+
//! \brief top-p阶段随机抽样使用的随机数种子。
|
|
2080
|
+
uint32_t randSeed = 0;
|
|
2081
|
+
//! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
|
|
2082
|
+
//! \brief top-k阶段保留的词的个数,需要小于词表的词数。
|
|
2083
|
+
//! \brief top-k必须大于0且小于或等于输入probs最后一维的大小。
|
|
2084
|
+
uint32_t topk = 100;
|
|
2085
|
+
//!
|
|
2086
|
+
//! \brief logProb logprobSwitch=true时有效
|
|
2087
|
+
//!
|
|
2088
|
+
int32_t logProbsSize = 0;
|
|
2089
|
+
//!
|
|
2090
|
+
//! \brief 预留参数
|
|
2091
|
+
//!
|
|
2092
|
+
uint8_t rsv[12] = {0};
|
|
2093
|
+
};
|
|
2094
|
+
|
|
2095
|
+
|
|
2096
|
+
//!
|
|
2097
|
+
//! \struct PadParam
|
|
2098
|
+
//!
|
|
2099
|
+
//! \brief 对于输入input_ids,取出每个batch最后一个有效token的embedding向量
|
|
2100
|
+
//!
|
|
2101
|
+
struct PadParam {
|
|
2102
|
+
//!
|
|
2103
|
+
//! \brief 预留参数
|
|
2104
|
+
//!
|
|
2105
|
+
uint8_t rsv[8] = {0};
|
|
2106
|
+
};
|
|
2107
|
+
|
|
2108
|
+
//!
|
|
2109
|
+
//! \struct UnpadParam
|
|
2110
|
+
//!
|
|
2111
|
+
//! \brief 对于输入input_ids,把所有有效的token拼接在一起,并在最后补0
|
|
2112
|
+
//!
|
|
2113
|
+
struct UnpadParam {
|
|
2114
|
+
//!
|
|
2115
|
+
//! \brief 预留参数
|
|
2116
|
+
//!
|
|
2117
|
+
uint8_t rsv[8] = {0};
|
|
2118
|
+
};
|
|
2119
|
+
|
|
2120
|
+
//!
|
|
2121
|
+
//! \struct SortParam
|
|
2122
|
+
//!
|
|
2123
|
+
//! \brief 后处理计算功能。实现输入tensor在最后一维上降序排列,并保留最大的num个元素,输出排序后的tensor及各元素对应的索引。
|
|
2124
|
+
//!
|
|
2125
|
+
struct SortParam {
|
|
2126
|
+
//!
|
|
2127
|
+
//! \brief 排序后保留的最大的元素的数量。
|
|
2128
|
+
//!
|
|
2129
|
+
//! \warning num是一个仅含有一个值的SVector,该值需大于0且小于等于输入x最后一维的大小。
|
|
2130
|
+
//!
|
|
2131
|
+
SVector<int32_t> num;
|
|
2132
|
+
//!
|
|
2133
|
+
//! \brief 预留参数
|
|
2134
|
+
//!
|
|
2135
|
+
uint8_t rsv[8] = {0};
|
|
2136
|
+
};
|
|
2137
|
+
|
|
2138
|
+
//!
|
|
2139
|
+
//! \struct NonzeroParam
|
|
2140
|
+
//!
|
|
2141
|
+
//! \brief 输出非零值索引。
|
|
2142
|
+
//!
|
|
2143
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持
|
|
2144
|
+
//!
|
|
2145
|
+
struct NonzeroParam {
|
|
2146
|
+
//!
|
|
2147
|
+
//! \brief 预留参数
|
|
2148
|
+
//!
|
|
2149
|
+
uint8_t rsv[8] = {0};
|
|
2150
|
+
};
|
|
2151
|
+
|
|
2152
|
+
//!
|
|
2153
|
+
//! \struct SwiGluQuantParam
|
|
2154
|
+
//!
|
|
2155
|
+
//! \brief 输出非零值索引。
|
|
2156
|
+
//!
|
|
2157
|
+
//! \warning 仅在Atlas 800I A2推理产品上支持
|
|
2158
|
+
//!
|
|
2159
|
+
struct SwigluQuantParam {
|
|
2160
|
+
//!
|
|
2161
|
+
//! \enum QuantType
|
|
2162
|
+
//!
|
|
2163
|
+
//! \brief 量化支持的类型
|
|
2164
|
+
//!
|
|
2165
|
+
enum QuantType : int {
|
|
2166
|
+
QUANT_TYPE_PER_TOKEN = 0, //!< PER_TOKEN量化
|
|
2167
|
+
};
|
|
2168
|
+
|
|
2169
|
+
//! \brief 量化类型。默认为QUANT_TYPE_PER_TOKEN量化。
|
|
2170
|
+
QuantType quantType = QUANT_TYPE_PER_TOKEN;
|
|
2171
|
+
|
|
2172
|
+
//!
|
|
2173
|
+
//! \brief 预留参数
|
|
2174
|
+
//!
|
|
2175
|
+
uint8_t rsv[8] = {0};
|
|
2176
|
+
};
|
|
2177
|
+
|
|
2178
|
+
|
|
2179
|
+
//!
|
|
2180
|
+
//! \struct OnehotParam
|
|
2181
|
+
//!
|
|
2182
|
+
//! \brief onehot编码。
|
|
2183
|
+
//!
|
|
2184
|
+
struct OnehotParam {
|
|
2185
|
+
//! \brief depth所在下标。可为负数。
|
|
2186
|
+
int64_t axis = 0;
|
|
2187
|
+
//! \brief 类别数。
|
|
2188
|
+
int64_t depth = 0;
|
|
2189
|
+
//!
|
|
2190
|
+
//! \brief 预留参数
|
|
2191
|
+
//!
|
|
2192
|
+
uint8_t rsv[8] = {0};
|
|
2193
|
+
};
|
|
2194
|
+
|
|
2195
|
+
//!
|
|
2196
|
+
//! \struct IndexAddParam
|
|
2197
|
+
//!
|
|
2198
|
+
//! \brief 固定维度的指定下标加上某个特定值。
|
|
2199
|
+
//!
|
|
2200
|
+
struct IndexAddParam {
|
|
2201
|
+
//!
|
|
2202
|
+
//! \enum IndexType
|
|
2203
|
+
//!
|
|
2204
|
+
//! \brief 指定下标需要执行的操作类型。
|
|
2205
|
+
//!
|
|
2206
|
+
enum IndexType {
|
|
2207
|
+
INDEX_UNDEFINED = 0, //!< 默认值。不支持。
|
|
2208
|
+
INDEX_ADD, //!< 加
|
|
2209
|
+
INDEX_ADD_VALID, //!< 有效长度内加。不支持Atlas 推理系列产品。
|
|
2210
|
+
};
|
|
2211
|
+
//!
|
|
2212
|
+
//! \brief 指定下标需要执行的操作类型。
|
|
2213
|
+
//!
|
|
2214
|
+
//! \note 默认值为INDEX_UNDEFINED。
|
|
2215
|
+
//!
|
|
2216
|
+
//! \warning 目前支持取值为INDEX_ADD/INDEX_ADD_VALID。
|
|
2217
|
+
//!
|
|
2218
|
+
IndexType indexType = INDEX_UNDEFINED;
|
|
2219
|
+
//!
|
|
2220
|
+
//! \brief 输入Tensor需加上updates更新值的轴。
|
|
2221
|
+
//!
|
|
2222
|
+
//! \note 默认值为0。
|
|
2223
|
+
//!
|
|
2224
|
+
//! \warning 当indexType为INDEX_ADD时,可为负数,取值范围为[-varDimNum, varDimNum - 1]。varDimNum为inTensor0的维度数。
|
|
2225
|
+
//!
|
|
2226
|
+
//! \warning 当indexType为INDEX_ADD_VALID时,仅支持取值为0。
|
|
2227
|
+
//!
|
|
2228
|
+
int64_t axis = 0;
|
|
2229
|
+
//!
|
|
2230
|
+
//! \brief 预留参数
|
|
2231
|
+
//!
|
|
2232
|
+
uint8_t rsv[16] = {0};
|
|
2233
|
+
};
|
|
2234
|
+
|
|
2235
|
+
//!
|
|
2236
|
+
//! \struct SendParam
|
|
2237
|
+
//!
|
|
2238
|
+
//! \brief 将当前通信卡的输入发送至指定通信卡上,当前只支持仅Atlas 800I A2推理产品.Send和Recv需要配套使用
|
|
2239
|
+
//!
|
|
2240
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
2241
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ destRank < rankSize
|
|
2242
|
+
//!
|
|
2243
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
2244
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
2245
|
+
//!
|
|
2246
|
+
//! \code
|
|
2247
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
2248
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
2249
|
+
//! ipcrm -a
|
|
2250
|
+
//! \endcode
|
|
2251
|
+
//!
|
|
2252
|
+
struct SendParam {
|
|
2253
|
+
//! \brief 当前卡所属通信编号
|
|
2254
|
+
int rank = 0;
|
|
2255
|
+
//! \brief 通信的卡的数量
|
|
2256
|
+
int rankSize = 0;
|
|
2257
|
+
//! \brief 主通信编号
|
|
2258
|
+
int rankRoot = 0;
|
|
2259
|
+
//! \brief 通信域内数据接收端的rank编号.
|
|
2260
|
+
uint32_t destRank = 1;
|
|
2261
|
+
//! \brief 通信后端指示,仅支持"hccl".
|
|
2262
|
+
std::string backend = "hccl";
|
|
2263
|
+
//! \brief HCCL通信域指针
|
|
2264
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
2265
|
+
HcclComm hcclComm = nullptr;
|
|
2266
|
+
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
2267
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2268
|
+
//!
|
|
2269
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
2270
|
+
//!
|
|
2271
|
+
std::string rankTableFile;
|
|
2272
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
|
|
2273
|
+
std::string commDomain;
|
|
2274
|
+
//!
|
|
2275
|
+
//! \brief 预留参数
|
|
2276
|
+
//!
|
|
2277
|
+
uint8_t rsv[64] = {0};
|
|
2278
|
+
};
|
|
2279
|
+
|
|
2280
|
+
//!
|
|
2281
|
+
//! \struct RecvParam
|
|
2282
|
+
//!
|
|
2283
|
+
//! \brief 从当前通信卡接收来自指定通信卡的数据,当前只支持仅Atlas 800I A2推理产品,Send和Recv需要配套使用
|
|
2284
|
+
//!
|
|
2285
|
+
//! rank、rankSize、rankRoot需满足以下条件:
|
|
2286
|
+
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ srcRank < rankSize
|
|
2287
|
+
//!
|
|
2288
|
+
//! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
|
|
2289
|
+
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
2290
|
+
//!
|
|
2291
|
+
//! \code
|
|
2292
|
+
//! rm -rf /dev/shm/sem.lccl*
|
|
2293
|
+
//! rm -rf /dev/shm/sem.hccl*
|
|
2294
|
+
//! ipcrm -a
|
|
2295
|
+
//! \endcode
|
|
2296
|
+
//!
|
|
2297
|
+
struct RecvParam {
|
|
2298
|
+
//! \brief 当前卡所属通信编号
|
|
2299
|
+
int rank = 0;
|
|
2300
|
+
//! \brief 通信的卡的数量
|
|
2301
|
+
int rankSize = 0;
|
|
2302
|
+
//! \brief 主通信编号
|
|
2303
|
+
int rankRoot = 0;
|
|
2304
|
+
//! \brief 通信域内数据发送端的rank编号.
|
|
2305
|
+
uint32_t srcRank = 1;
|
|
2306
|
+
//! \brief 通信后端指示,仅支持"hccl".
|
|
2307
|
+
std::string backend = "hccl";
|
|
2308
|
+
//! \brief HCCL通信域指针
|
|
2309
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
2310
|
+
HcclComm hcclComm = nullptr;
|
|
2311
|
+
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
2312
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2313
|
+
//!
|
|
2314
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
2315
|
+
//!
|
|
2316
|
+
std::string rankTableFile;
|
|
2317
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
|
|
2318
|
+
std::string commDomain;
|
|
2319
|
+
//!
|
|
2320
|
+
//! \brief 预留参数
|
|
2321
|
+
//!
|
|
2322
|
+
uint8_t rsv[64] = {0};
|
|
2323
|
+
};
|
|
2324
|
+
|
|
2325
|
+
//!
|
|
2326
|
+
//! \struct AllToAllParam
|
|
2327
|
+
//!
|
|
2328
|
+
//! \brief 向通信域内所有通信卡发送相同数据量(输入切分成ranksize份)的数据,并从所有通信卡接收相同数据量的数据,当前只支持仅Atlas 800I A2推理产品.
|
|
2329
|
+
//!
|
|
2330
|
+
struct AllToAllParam {
|
|
2331
|
+
//! \brief 当前卡所属通信编号.
|
|
2332
|
+
int rank = 0;
|
|
2333
|
+
//! \brief 通信的卡的数量.
|
|
2334
|
+
int rankSize = 0;
|
|
2335
|
+
//! \brief 主通信编号.
|
|
2336
|
+
int rankRoot = 0;
|
|
2337
|
+
//!
|
|
2338
|
+
//! \brief 通信计算类型。仅Atlas 800 A3推理产品支持配置为"lccl"。
|
|
2339
|
+
//!
|
|
2340
|
+
std::string backend = "hccl";
|
|
2341
|
+
//! \brief HCCL通信域指针.
|
|
2342
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
2343
|
+
HcclComm hcclComm = nullptr;
|
|
2344
|
+
//! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
|
|
2345
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2346
|
+
//!
|
|
2347
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
2348
|
+
//!
|
|
2349
|
+
std::string rankTableFile;
|
|
2350
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
2351
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
2352
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
2353
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
2354
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
2355
|
+
std::string commDomain;
|
|
2356
|
+
//! \brief 通信结果对输入进行转置。
|
|
2357
|
+
//! 仅当backend为"lccl"时生效
|
|
2358
|
+
bool transpose = false;
|
|
2359
|
+
//!
|
|
2360
|
+
//! \brief 预留参数
|
|
2361
|
+
//!
|
|
2362
|
+
uint8_t rsv[62] = {0};
|
|
2363
|
+
};
|
|
2364
|
+
|
|
2365
|
+
//!
|
|
2366
|
+
//! \struct AllToAllVParam
|
|
2367
|
+
//!
|
|
2368
|
+
//! \brief 向通信域内所有通信卡发送数据(数据量可以定制),并从所有通信卡接收数据,当前只支持仅Atlas 800I A2推理产品.
|
|
2369
|
+
//!
|
|
2370
|
+
struct AllToAllVParam {
|
|
2371
|
+
//! \brief 当前卡所属通信编号.
|
|
2372
|
+
int rank = 0;
|
|
2373
|
+
//! \brief 通信的卡的数量.
|
|
2374
|
+
int rankSize = 0;
|
|
2375
|
+
//! \brief 主通信编号.
|
|
2376
|
+
int rankRoot = 0;
|
|
2377
|
+
//! \brief 表示发送数据量的数组.
|
|
2378
|
+
//! 例如,若发送的数据类型为float32,sendCounts[i] = n 表示本rank发给rank i n个float32数据。
|
|
2379
|
+
std::vector<int64_t> sendCounts;
|
|
2380
|
+
//! \brief 表示发送偏移量的数组.
|
|
2381
|
+
//! sdispls[i] = n表示本rank从相对于输入起始位置的的偏移量为n的位置开始发送数据给rank i
|
|
2382
|
+
std::vector<int64_t> sdispls;
|
|
2383
|
+
//! \brief 表示接收数据量的数组.
|
|
2384
|
+
//! 例如,若发送的数据类型为float32,recvCounts[i] = n 表示本rank从rank i收到n个float32数据。
|
|
2385
|
+
std::vector<int64_t> recvCounts;
|
|
2386
|
+
//! \brief 表示接收偏移量的数组.
|
|
2387
|
+
// rdispls[i] = n表示本rank从相对于输出起始位置的的偏移量为n的位置开始接收rank i的数据
|
|
2388
|
+
std::vector<int64_t> rdispls;
|
|
2389
|
+
//!
|
|
2390
|
+
//! \brief 通信计算类型,仅支持"hccl".
|
|
2391
|
+
//!
|
|
2392
|
+
std::string backend = "hccl";
|
|
2393
|
+
//! \brief HCCL通信域指针.
|
|
2394
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
2395
|
+
HcclComm hcclComm = nullptr;
|
|
2396
|
+
//! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
|
|
2397
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2398
|
+
//!
|
|
2399
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
2400
|
+
//!
|
|
2401
|
+
std::string rankTableFile;
|
|
2402
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用。
|
|
2403
|
+
//! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
|
|
2404
|
+
//! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
|
|
2405
|
+
//! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
|
|
2406
|
+
//! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
|
|
2407
|
+
std::string commDomain;
|
|
2408
|
+
//!
|
|
2409
|
+
//! \brief 预留参数
|
|
2410
|
+
//!
|
|
2411
|
+
uint8_t rsv[64] = {0};
|
|
2412
|
+
};
|
|
2413
|
+
|
|
872
2414
|
//!
|
|
873
|
-
//! \
|
|
2415
|
+
//! \struct AllToAllVV2Param
|
|
874
2416
|
//!
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
//! \brief
|
|
879
|
-
|
|
2417
|
+
//! \brief 向通信域内所有通信卡发送数据(数据量可以定制),并从所有通信卡接收数据,当前只支持仅Atlas 800I A2推理产品.
|
|
2418
|
+
//!
|
|
2419
|
+
struct AllToAllVV2Param {
|
|
2420
|
+
//! \brief 当前卡所属通信编号.
|
|
2421
|
+
int rank = -1;
|
|
2422
|
+
//! \brief 通信的卡的数量.
|
|
2423
|
+
int rankSize = 0;
|
|
2424
|
+
//! \brief 主通信编号.
|
|
2425
|
+
int rankRoot = 0;
|
|
2426
|
+
//!
|
|
2427
|
+
//! \brief 通信计算类型,仅支持"hccl".
|
|
2428
|
+
//!
|
|
2429
|
+
std::string backend = "hccl";
|
|
2430
|
+
//! \brief HCCL通信域指针.
|
|
2431
|
+
//! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
|
|
2432
|
+
HcclComm hcclComm = nullptr;
|
|
2433
|
+
//! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
|
|
2434
|
+
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2435
|
+
//!
|
|
2436
|
+
//! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
|
|
2437
|
+
//!
|
|
2438
|
+
//! ranktable配置参考
|
|
2439
|
+
//! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
|
|
2440
|
+
//!
|
|
2441
|
+
std::string rankTableFile;
|
|
2442
|
+
//! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
|
|
2443
|
+
std::string commDomain;
|
|
2444
|
+
//!
|
|
2445
|
+
//! \brief 预留参数
|
|
2446
|
+
//!
|
|
2447
|
+
uint8_t rsv[64] = {0};
|
|
880
2448
|
};
|
|
881
2449
|
|
|
882
2450
|
//!
|
|
@@ -886,454 +2454,546 @@ struct RopeParam {
|
|
|
886
2454
|
//! \param right
|
|
887
2455
|
//! \return bool
|
|
888
2456
|
//!
|
|
889
|
-
inline bool operator==(const
|
|
2457
|
+
inline bool operator==(const AllToAllVV2Param &left, const AllToAllVV2Param &right)
|
|
890
2458
|
{
|
|
891
|
-
return left.
|
|
2459
|
+
return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
|
|
2460
|
+
left.hcclComm == right.hcclComm && left.commMode == right.commMode && left.backend == right.backend &&
|
|
2461
|
+
left.rankTableFile == right.rankTableFile && left.commDomain == right.commDomain;
|
|
892
2462
|
}
|
|
893
2463
|
|
|
894
2464
|
//!
|
|
895
|
-
//! \
|
|
2465
|
+
//! \struct GroupTopkParam
|
|
896
2466
|
//!
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
UNDEFINED = 0, //!< decoder&encoder for flashAttention
|
|
905
|
-
ENCODER, //!< encoder for flashAttention
|
|
906
|
-
DECODER, //!< decoder for flashAttention
|
|
907
|
-
PA_ENCODER //!< encoder for pagedAttention
|
|
908
|
-
};
|
|
2467
|
+
//! \brief GroupTopk算子超参数。将输入inTensor0中维度1(inTensor0有2个维度:维度0和维度1)数据分groupNum个组,每组取最大值,然后选出每组最大值中前k个,最后将非前k个组的数据全部置零。
|
|
2468
|
+
//!
|
|
2469
|
+
//! \note
|
|
2470
|
+
//!
|
|
2471
|
+
//! \warning
|
|
2472
|
+
//!
|
|
2473
|
+
struct GroupTopkParam {
|
|
909
2474
|
//!
|
|
910
|
-
//! \
|
|
2475
|
+
//! \brief 每个token分组数量。注:“专家总数”为inTensor0Desc.shape.dims[1]的值。
|
|
911
2476
|
//!
|
|
912
|
-
//! \
|
|
2477
|
+
//! \note 必传,默认值为1,取值范围为[1, 专家总数]。
|
|
913
2478
|
//!
|
|
914
|
-
|
|
915
|
-
KERNELTYPE_DEFAULT = 0, //!< i:fp16, bmm:fp16, o:fp16
|
|
916
|
-
KERNELTYPE_HIGH_PRECISION //!< i:fp16, bmm:fp32, o:fp16
|
|
917
|
-
};
|
|
2479
|
+
//! \warning groupNum需要保证可以被inTensor0Desc.shape.dims[1]整除。
|
|
918
2480
|
//!
|
|
919
|
-
|
|
2481
|
+
int32_t groupNum = 1;
|
|
920
2482
|
//!
|
|
921
|
-
//! \brief
|
|
2483
|
+
//! \brief 选择top K组数量。
|
|
922
2484
|
//!
|
|
923
|
-
|
|
924
|
-
CLAMP_TYPE_UNDEFINED = 0, //!< 不做clamp
|
|
925
|
-
CLAMP_TYPE_MIN_MAX //!< 做clamp,同时指定最大最小值
|
|
926
|
-
};
|
|
2485
|
+
//! \note 必传,默认值为0,取值范围为[1, groupNum]。
|
|
927
2486
|
//!
|
|
928
|
-
//! \
|
|
2487
|
+
//! \warning
|
|
929
2488
|
//!
|
|
930
|
-
|
|
2489
|
+
int32_t k = 0;
|
|
931
2490
|
//!
|
|
932
|
-
enum
|
|
933
|
-
MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
|
|
934
|
-
MASK_TYPE_NORM, //!< 倒三角mask
|
|
935
|
-
MASK_TYPE_ALIBI, //!< alibi mask
|
|
936
|
-
MASK_TYPE_NORM_COMPRESS, //!< 倒三角压缩mask
|
|
937
|
-
MASK_TYPE_ALIBI_COMPRESS, //!< alibi压缩mask
|
|
938
|
-
MASK_TYPE_ALIBI_COMPRESS_SQRT, //!< alibi压缩开平方mask
|
|
939
|
-
MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN //!< alibi压缩mask左对齐,只支持Atlas 800I A2
|
|
940
|
-
};
|
|
2491
|
+
//! \enum GroupMultiFlag
|
|
941
2492
|
//!
|
|
942
|
-
//! \
|
|
2493
|
+
//! \brief 指定GroupTopk每组中取值计算的方式。
|
|
943
2494
|
//!
|
|
944
|
-
//! \
|
|
2495
|
+
//! \warning
|
|
945
2496
|
//!
|
|
946
|
-
enum
|
|
947
|
-
|
|
948
|
-
|
|
2497
|
+
enum GroupMultiFlag : uint16_t {
|
|
2498
|
+
UNDEFINED = 0, //!< 默认方式,每组内取最大值。
|
|
2499
|
+
SUM_MULTI_MAX //!< 每组内取n个最大值求和,需要设置参数n
|
|
949
2500
|
};
|
|
950
|
-
//! query头大小, 需大于或等于0
|
|
951
|
-
int32_t headNum = 0;
|
|
952
|
-
//! kv头数量, 该值需要用户根据使用的模型实际情况传入
|
|
953
|
-
//! kvHeadNum = 0时,keyCache的k_head_num,valueCache的v_head_num与query的num_heads一致,均为num_heads的数值
|
|
954
|
-
//! kvHeadNum != 0时,keyCache的k_head_num, valueCache的v_head_num与kvHeadNum值相同
|
|
955
|
-
int32_t kvHeadNum = 0;
|
|
956
|
-
//! query缩放系数
|
|
957
|
-
float qScale = 1;
|
|
958
|
-
//! 算子tor值, 在Q*K^T后乘
|
|
959
|
-
float qkScale = 1;
|
|
960
|
-
//! 是否开启动态batch
|
|
961
|
-
bool batchRunStatusEnable = false;
|
|
962
|
-
//! 是否开启倒三角优化, 只有mask为倒三角的时候才能开启优化
|
|
963
|
-
uint32_t isTriuMask = 0;
|
|
964
|
-
//! 计算类型
|
|
965
|
-
CalcType calcType = UNDEFINED;
|
|
966
|
-
//! 内核精度类型
|
|
967
|
-
KernelType kernelType = KERNELTYPE_DEFAULT;
|
|
968
|
-
//! clamp类型
|
|
969
|
-
ClampType clampType = CLAMP_TYPE_UNDEFINED;
|
|
970
|
-
//! clamp功能最小值
|
|
971
|
-
float clampMin = 0;
|
|
972
|
-
//! clamp功能最大值
|
|
973
|
-
float clampMax = 0;
|
|
974
|
-
//! mask类型
|
|
975
|
-
MaskType maskType = MASK_TYPE_UNDEFINED;
|
|
976
|
-
//! kvcache配置
|
|
977
|
-
KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE;
|
|
978
|
-
};
|
|
979
|
-
|
|
980
|
-
//!
|
|
981
|
-
//! \brief PagedAttention.
|
|
982
|
-
//!
|
|
983
|
-
//! 一个Q有多个token,一个token对应多个KV的token,以token0为例,block_table代表其对应的KV的block_id,-1代表截止,
|
|
984
|
-
//! 所以第二行和第四行为其目标block,context_lens则表示KV有多少个token,则代表仅有block_id为(3,4,5,9,10)是需要与Q进行计算的。
|
|
985
|
-
//!
|
|
986
|
-
struct PagedAttentionParam {
|
|
987
|
-
//! query 头大小
|
|
988
|
-
int32_t headNum = 0;
|
|
989
|
-
//! 算子tor值, 在Q*K^T后乘
|
|
990
|
-
float qkScale = 1.0;
|
|
991
|
-
//! kv头数量
|
|
992
|
-
int32_t kvHeadNum = 0;
|
|
993
|
-
//!
|
|
994
|
-
//! \enum MaskType
|
|
995
|
-
//!
|
|
996
|
-
//! \brief The type values of MaskType.
|
|
997
2501
|
//!
|
|
998
|
-
|
|
999
|
-
UNDEFINED = 0, //!< 默认值,全0的mask
|
|
1000
|
-
MASK_TYPE_NORM, //!< 倒三角mask
|
|
1001
|
-
MASK_TYPE_ALIBI, //!< alibi mask
|
|
1002
|
-
MASK_TYPE_SPEC //!< 并行解码mask
|
|
1003
|
-
};
|
|
1004
|
-
//! mask类型
|
|
1005
|
-
MaskType maskType = UNDEFINED;
|
|
1006
|
-
//! 是否开启动态batch
|
|
1007
|
-
bool batchRunStatusEnable = false;
|
|
2502
|
+
//! \brief 指定GroupTopk每组中取值计算的方式。
|
|
1008
2503
|
//!
|
|
1009
|
-
//! \
|
|
2504
|
+
//! \note 默认值为UNDEFINED。
|
|
1010
2505
|
//!
|
|
1011
|
-
//! \
|
|
2506
|
+
//! \warning 取值为SUM_MULTI_MAX时需要传入参数n。
|
|
1012
2507
|
//!
|
|
1013
|
-
|
|
1014
|
-
TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合
|
|
1015
|
-
TYPE_DEQUANT_FUSION //!< 与反量化融合, 只支持Atlas 800I A2
|
|
1016
|
-
};
|
|
1017
|
-
//! 量化类型
|
|
1018
|
-
QuantType quantType = TYPE_QUANT_UNDEFINED;
|
|
1019
|
-
//! 开启量化功能后是否使用offset
|
|
1020
|
-
bool hasQuantOffset = false;
|
|
2508
|
+
GroupMultiFlag groupMultiFlag = UNDEFINED;
|
|
1021
2509
|
//!
|
|
1022
|
-
//! \
|
|
2510
|
+
//! \brief 每组内取值的个数。
|
|
1023
2511
|
//!
|
|
1024
|
-
//! \
|
|
2512
|
+
//! \note 默认值为1,取值范围为[1,expert_num/groupNum]。
|
|
1025
2513
|
//!
|
|
1026
|
-
|
|
1027
|
-
COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
|
|
1028
|
-
COMPRESS_TYPE_KVHEAD //!< 压缩key_cache, value_cahe的kvHead维度, 只支持Atlas 800I A2
|
|
1029
|
-
};
|
|
1030
|
-
|
|
1031
|
-
//! 压缩方式
|
|
1032
|
-
CompressType compressType = COMPRESS_TYPE_UNDEFINED;
|
|
2514
|
+
//! \warning 只有当groupMultiFlag为SUM_MULTI_MAX时有效
|
|
1033
2515
|
//!
|
|
1034
|
-
|
|
2516
|
+
uint16_t n = 1;
|
|
1035
2517
|
//!
|
|
1036
|
-
//! \brief
|
|
2518
|
+
//! \brief 预留参数
|
|
1037
2519
|
//!
|
|
1038
|
-
|
|
1039
|
-
CALC_TYPE_UNDEFINED = 0, //!< 默认值,不开启并行解码
|
|
1040
|
-
CALC_TYPE_SPEC //!< 并行解码功能
|
|
1041
|
-
};
|
|
1042
|
-
//! 计算类型
|
|
1043
|
-
CalcType calcType = CALC_TYPE_UNDEFINED;
|
|
2520
|
+
uint8_t rsv[12] = {0};
|
|
1044
2521
|
};
|
|
1045
2522
|
|
|
1046
2523
|
//!
|
|
1047
|
-
//! \
|
|
2524
|
+
//! \struct GroupedMatmulWithRoutingParam
|
|
1048
2525
|
//!
|
|
1049
|
-
//!
|
|
1050
|
-
//! 其中:b表示batch,如果batch为1,该维度为1,不可省略。如果batch有多个,该维度为所有batch维度合轴的结果。
|
|
1051
|
-
//! m0/n0表示对齐位,float16时,n0与m0都为16, int8时,n0为32,m0为16,m1m0表示原始ND的m维度经过对齐位向上对齐,
|
|
1052
|
-
//! n1表示原始ND的n维度经过对齐位向上对齐后,除以n0的商。例如原始ND的dims为{8, 100, 30},则其对应的NZ的dims为{8, 2, 112, 16}。
|
|
2526
|
+
//! \brief 实现了GroupedMatmulWithRouting算子的Up和Down方法,将topK个专家权重与token激活值做矩阵乘法计算。
|
|
1053
2527
|
//!
|
|
1054
|
-
//! \warning
|
|
1055
|
-
//! - 如果m0m1落在区间(k1 × 16, (k1 + 1) × 16](其中k1为正整数)内,那么该区间即为outCrops[0]的取值范围要求。
|
|
1056
|
-
//! - 如果n0*n1落在区间(k2 × 16, (k2 + 1) × 16](其中k2为正整数)内,那么该区间即为outCrops[1]的取值范围要求。
|
|
2528
|
+
//! \warning 仅Atlas 800I A2推理产品支持该算子
|
|
1057
2529
|
//!
|
|
1058
|
-
|
|
2530
|
+
|
|
2531
|
+
struct GroupedMatmulWithRoutingParam {
|
|
1059
2532
|
//!
|
|
1060
|
-
//! \enum
|
|
2533
|
+
//! \enum GroupedMatmulType
|
|
1061
2534
|
//!
|
|
1062
|
-
//! \brief
|
|
2535
|
+
//! \brief 指定GroupedMatmulWithRouting算子需要执行的操作类型。
|
|
1063
2536
|
//!
|
|
1064
|
-
enum
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
ND_TO_FRACTAL_NZ //!< ND转FRACTAL_NZ
|
|
2537
|
+
enum GroupedMatmulType : int {
|
|
2538
|
+
GROUPED_MATMUL_UP = 0, //!< 默认值。up类型。
|
|
2539
|
+
GROUPED_MATMUL_DOWN //!< down类型。
|
|
1068
2540
|
};
|
|
1069
|
-
//! \brief
|
|
1070
|
-
|
|
1071
|
-
//! \brief
|
|
1072
|
-
|
|
1073
|
-
};
|
|
1074
|
-
|
|
1075
|
-
//!
|
|
1076
|
-
//! \brief 三目运算。
|
|
1077
|
-
//!
|
|
1078
|
-
//! 输入张量为cond,x,y, 输出张量 z = cond ? x : y;
|
|
1079
|
-
//! 输入cond的元素只能是0或者1
|
|
1080
|
-
//! 输出z的维度为输入x与y广播后的结果。要求cond, x, y必须是可广播的。
|
|
1081
|
-
//!
|
|
1082
|
-
struct WhereParam {};
|
|
1083
|
-
|
|
1084
|
-
//!
|
|
1085
|
-
//! \brief 将输入Tensor的Shape,按指定轴扩展指定的倍数。
|
|
1086
|
-
//!
|
|
1087
|
-
//! \warning 输出y的维度和multiples维度一致,每个维度大小为输入x广播到multiples维度后和multiples对应维度的乘积。
|
|
1088
|
-
//!
|
|
1089
|
-
struct RepeatParam {
|
|
2541
|
+
//! \brief 是否转置B矩阵(专家权重)。
|
|
2542
|
+
bool transposeB = true;
|
|
2543
|
+
//! \brief 选取的topK专家个数
|
|
2544
|
+
int32_t topK = 0;
|
|
1090
2545
|
//!
|
|
1091
|
-
//! \brief
|
|
2546
|
+
//! \brief 指定GroupedMatmulWithRouting算子需要执行的操作类型。
|
|
1092
2547
|
//!
|
|
1093
|
-
//! \
|
|
1094
|
-
//! - 支持在不超过两个维度上进行扩展
|
|
1095
|
-
//! - multiples的维度小于等于8且需大于或等于输入x的维度,每一个元素要求大于0。
|
|
2548
|
+
//! \note 默认值为GROUPED_MATMUL_UP。
|
|
1096
2549
|
//!
|
|
1097
|
-
|
|
2550
|
+
//! \warning 目前支持取值为GROUPED_MATMUL_UP/GROUPED_MATMUL_DOWN。
|
|
2551
|
+
//!
|
|
2552
|
+
GroupedMatmulType groupedMatmulType = GROUPED_MATMUL_UP;
|
|
2553
|
+
//!
|
|
2554
|
+
//! \brief 指定输出值的反量化类型。
|
|
2555
|
+
//!
|
|
2556
|
+
//! \note 默认值为ACL_DT_UNDEFINED。
|
|
2557
|
+
//!
|
|
2558
|
+
//! \warning 非量化场景下:仅支持配置为ACL_DT_UNDEFINED。量化场景下支持ACL_FLOAT16/ACL_BF16
|
|
2559
|
+
//!
|
|
2560
|
+
aclDataType outDataType = ACL_DT_UNDEFINED;
|
|
2561
|
+
//!
|
|
2562
|
+
//! \brief 预留参数
|
|
2563
|
+
//!
|
|
2564
|
+
uint8_t rsv[16] = {0};
|
|
1098
2565
|
};
|
|
1099
2566
|
|
|
1100
2567
|
//!
|
|
1101
|
-
//! \struct
|
|
1102
|
-
//!
|
|
1103
|
-
//! \brief 将输入源张量中的内容拷贝到输入目标张量指定位置中.
|
|
1104
|
-
//!
|
|
1105
|
-
//! 该拷贝为原地拷贝,最终结果修改在输入目标张量中.<br>
|
|
1106
|
-
//! 输入目标张量 dst: [a,b,c], 输入源张量src: [d,e,f].
|
|
1107
|
-
//! dst[starts[0]: ends[0], starts[1]: ends[1], starts[2]: ends[2]] = src.<br>
|
|
1108
|
-
//! 其中 ends[0]-starts[0]需为src第0维的维度大小,ends[1]-starts[1]需为为src第1维的维度大小,ends[2]-starts[2]需为src第2维的维度大小。
|
|
2568
|
+
//! \struct GroupedMatmulInplaceAddParam
|
|
1109
2569
|
//!
|
|
1110
|
-
//! \
|
|
1111
|
-
//! 输入src的各维度大小要求小于或等于输入dst对应维度大小.<br>
|
|
1112
|
-
//! 输入src和输入dst的各维度要求有一个或两个维度不相同,且需要满足:
|
|
1113
|
-
//! - 如果有一个维度不相同,则这个维度不能是最高维(第0维)。
|
|
1114
|
-
//! - 如果有两个维度不相同,则其中一个不同的维度必须是最高维(第0维)。
|
|
1115
|
-
//
|
|
1116
|
-
struct SetValueParam {
|
|
1117
|
-
//! \brief 每一维拷贝起始位置
|
|
1118
|
-
SVector<int64_t> starts;
|
|
1119
|
-
//! \brief 每一维拷贝结束位置后一个位置,拷贝到该位置前一个位置为止
|
|
1120
|
-
SVector<int64_t> ends;
|
|
1121
|
-
//! \brief 每一维拷贝步长,当前仅支持strides为全1.
|
|
1122
|
-
SVector<int64_t> strides;
|
|
1123
|
-
};
|
|
1124
|
-
|
|
2570
|
+
//! \brief 将A、B两个矩阵按照规则进行分组矩阵乘运算,并累加在矩阵C上作为输出。
|
|
1125
2571
|
//!
|
|
1126
|
-
//! \
|
|
2572
|
+
//! \note 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行分组矩阵乘运算并累加在矩阵C上,可通过参数transposeA与transposeB控制做矩
|
|
2573
|
+
//! 阵乘前是否需要对A矩阵和B矩阵进行行列转置,根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系。例如,当transposeA为false,
|
|
2574
|
+
//! transposeB为true时,x和weight的shape可以分别为[m, k]和[n, k]。
|
|
1127
2575
|
//!
|
|
1128
|
-
struct
|
|
2576
|
+
struct GroupedMatmulInplaceAddParam {
|
|
1129
2577
|
//!
|
|
1130
|
-
//! \
|
|
2578
|
+
//! \brief 是否转置A矩阵。
|
|
1131
2579
|
//!
|
|
1132
|
-
//! \
|
|
2580
|
+
//! \note 默认值为false,不转置。
|
|
1133
2581
|
//!
|
|
1134
|
-
|
|
1135
|
-
REDUCE_UNDEFINED = 0, //!< 未定义。
|
|
1136
|
-
REDUCE_MAX, //!< 求最大值。
|
|
1137
|
-
REDUCE_MIN, //!< 求最小值。
|
|
1138
|
-
REDUCE_SUM, //!< 求和。
|
|
1139
|
-
};
|
|
1140
|
-
//! \brief reduceType
|
|
1141
|
-
ReduceType reduceType = REDUCE_UNDEFINED;
|
|
2582
|
+
bool transposeA = false;
|
|
1142
2583
|
//!
|
|
1143
|
-
//! \brief
|
|
2584
|
+
//! \brief 是否转置B矩阵。
|
|
1144
2585
|
//!
|
|
1145
|
-
//! \
|
|
1146
|
-
//! axis可以支持多个轴上进行处理,各元素要求小于x的维度且大于等于0
|
|
2586
|
+
//! \note 默认值为false,不转置,当前仅支持false。
|
|
1147
2587
|
//!
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
//!
|
|
1152
|
-
|
|
1153
|
-
//! 支持btach级别随机种子、top-k取样,支持exponential取样
|
|
1154
|
-
//! \warning probs必须是两维张量。
|
|
1155
|
-
//!
|
|
1156
|
-
struct TopkToppSamplingParam {
|
|
1157
|
-
//! \brief 取样处理类型
|
|
1158
|
-
enum TopkToppSamplingType {
|
|
1159
|
-
SAMPLING_UNDEFINED = -1, //!< 未定义
|
|
1160
|
-
SINGLE_TOPK_SAMPLING, //!< 非batch级别随机种子、Topk的取样
|
|
1161
|
-
BATCH_TOPK_MULTINOMIAL_SAMPLING, //!< batch级别随机种子、Topk的multinomial取样
|
|
1162
|
-
BATCH_TOPK_EXPONENTIAL_SAMPLING, //!< batch级别随机种子、Topk的exponential取样
|
|
1163
|
-
SAMPLING_MAX, //!< 枚举最大值
|
|
1164
|
-
};
|
|
1165
|
-
//! \brief 采样类型,默认为非batch级别随机种子、Topk的取样
|
|
1166
|
-
TopkToppSamplingType topkToppSamplingType = SINGLE_TOPK_SAMPLING;
|
|
1167
|
-
//! \brief 当 topkToppSamplingType为BATCH_TOPK_MULTINOMIAL_SAMPLING时使用
|
|
1168
|
-
//! \brief 每个batch下top-p阶段随机抽样使用的随机数种子。
|
|
1169
|
-
//! \brief 维度与batch大小一致。
|
|
1170
|
-
std::vector<uint32_t> randSeeds;
|
|
1171
|
-
//! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
|
|
1172
|
-
//! \brief top-p阶段随机抽样使用的随机数种子。
|
|
1173
|
-
uint32_t randSeed = 0;
|
|
1174
|
-
//! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
|
|
1175
|
-
//! \brief top-k阶段保留的词的个数,需要小于词表的词数。
|
|
1176
|
-
//! \brief top-k必须大于0且小于或等于输入probs最后一维的大小。
|
|
1177
|
-
uint32_t topk = 100;
|
|
2588
|
+
bool transposeB = false;
|
|
2589
|
+
//!
|
|
2590
|
+
//! \brief 预留参数
|
|
2591
|
+
//!
|
|
2592
|
+
uint8_t rsv[22] = {0};
|
|
1178
2593
|
};
|
|
1179
2594
|
|
|
1180
|
-
|
|
1181
|
-
//!
|
|
1182
|
-
//! \struct PadParam
|
|
1183
|
-
//!
|
|
1184
|
-
//! \brief 对于输入input_ids,取出每个batch最后一个有效token的embedding向量
|
|
1185
2595
|
//!
|
|
1186
|
-
struct
|
|
1187
|
-
|
|
2596
|
+
//! \struct CohereLayerNormParam
|
|
1188
2597
|
//!
|
|
1189
|
-
//! \
|
|
2598
|
+
//! \brief CohereLayerNorm可以将网络层输入根据最后一维归一化到[0, 1]之间。
|
|
1190
2599
|
//!
|
|
1191
|
-
//! \
|
|
2600
|
+
//! \note 针对Command R Plus模型,对多batch数据用于表示根据最后一维进行归一化操作。
|
|
1192
2601
|
//!
|
|
1193
|
-
struct
|
|
2602
|
+
struct CohereLayerNormParam {
|
|
2603
|
+
//!
|
|
2604
|
+
//! \brief epsilon,放在分母上防止除0。
|
|
2605
|
+
//!
|
|
2606
|
+
//! \note 默认值为1e-5。
|
|
2607
|
+
//!
|
|
2608
|
+
//! \warning epsilon的取值要求大于0。
|
|
2609
|
+
float epsilon = 1e-5;
|
|
2610
|
+
//!
|
|
2611
|
+
//! \brief 预留参数
|
|
2612
|
+
//!
|
|
2613
|
+
uint8_t rsv[32] = {0};
|
|
2614
|
+
};
|
|
1194
2615
|
|
|
1195
2616
|
//!
|
|
1196
|
-
//! \struct
|
|
2617
|
+
//! \struct GatherPreRmsNormParam
|
|
1197
2618
|
//!
|
|
1198
|
-
//! \brief
|
|
2619
|
+
//! \brief 首先对ResIn进行Gather索引操作,然后与X相加,最后进行RmsNorm计算。
|
|
1199
2620
|
//!
|
|
1200
|
-
|
|
2621
|
+
//! \warning 仅Atlas 800I A2推理产品支持该算子
|
|
2622
|
+
//!
|
|
2623
|
+
struct GatherPreRmsNormParam {
|
|
1201
2624
|
//!
|
|
1202
|
-
//! \brief
|
|
2625
|
+
//! \brief epsilon,放在分母上防止除0。
|
|
1203
2626
|
//!
|
|
1204
|
-
//! \
|
|
2627
|
+
//! \note 默认值为1e-5。
|
|
1205
2628
|
//!
|
|
1206
|
-
|
|
2629
|
+
//! \warning epsilon的取值要求大于0。
|
|
2630
|
+
float epsilon = 1e-5;
|
|
2631
|
+
//!
|
|
2632
|
+
//! \brief 预留参数
|
|
2633
|
+
//!
|
|
2634
|
+
uint8_t rsv[28] = {0};
|
|
1207
2635
|
};
|
|
1208
2636
|
|
|
1209
2637
|
//!
|
|
1210
|
-
//! \struct
|
|
2638
|
+
//! \struct NormRopeReshapeParam
|
|
1211
2639
|
//!
|
|
1212
|
-
//! \brief
|
|
2640
|
+
//! \brief 融合rmsnorm、rope、reshapeAndCache。
|
|
1213
2641
|
//!
|
|
1214
|
-
//! \warning
|
|
2642
|
+
//! \warning 仅Atlas 800I A2推理产品支持该算子
|
|
1215
2643
|
//!
|
|
1216
|
-
struct
|
|
2644
|
+
struct NormRopeReshapeParam {
|
|
2645
|
+
//! \brief precisionMode,精度模式。
|
|
2646
|
+
uint32_t precisionMode = 0;
|
|
2647
|
+
//! \brief rotaryCoeff,算子内Rope部分计算的旋转系数。
|
|
2648
|
+
uint32_t rotaryCoeff = 2;
|
|
2649
|
+
//! \brief epsilon,归一化时加在分母上防止除零。
|
|
2650
|
+
float epsilon = 1e-5;
|
|
2651
|
+
//!
|
|
2652
|
+
//! \brief 预留参数
|
|
2653
|
+
//!
|
|
2654
|
+
//! \note 默认值为1e-5。
|
|
2655
|
+
//!
|
|
2656
|
+
uint8_t rsv[16] = {0};
|
|
2657
|
+
};
|
|
1217
2658
|
|
|
1218
2659
|
//!
|
|
1219
|
-
//! \struct
|
|
2660
|
+
//! \struct FusedAddTopkDivParam
|
|
1220
2661
|
//!
|
|
1221
|
-
//! \brief
|
|
2662
|
+
//! \brief Deepseek融合算子:Sigmoid+Add+GroupTopk+Gather+ReduceSum,RealDiv,Muls。
|
|
1222
2663
|
//!
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
2664
|
+
//! \note OP详细描述。
|
|
2665
|
+
//!
|
|
2666
|
+
//! \warning 当前仅支持Atlas 800I A2 推理产品、Atlas A2 训练系列产品和Atlas A3 训练系列产品。
|
|
2667
|
+
//!
|
|
2668
|
+
struct FusedAddTopkDivParam {
|
|
2669
|
+
//!
|
|
2670
|
+
//! \brief 分组数量。
|
|
2671
|
+
//!
|
|
2672
|
+
//! \note 默认值为1。
|
|
2673
|
+
//!
|
|
2674
|
+
//! \warning 取值大于0。
|
|
2675
|
+
//!
|
|
2676
|
+
uint32_t groupNum = 1;
|
|
2677
|
+
//!
|
|
2678
|
+
//! \brief 选择k个组。
|
|
2679
|
+
//!
|
|
2680
|
+
//! \note 默认值为1。
|
|
2681
|
+
//!
|
|
2682
|
+
//! \warning 取值范围为(0, groupNum]。
|
|
2683
|
+
//!
|
|
2684
|
+
uint32_t groupTopk = 1;
|
|
2685
|
+
//!
|
|
2686
|
+
//! \brief 组内选取n个最大值求和。
|
|
2687
|
+
//!
|
|
2688
|
+
//! \note 默认值为1。
|
|
2689
|
+
//!
|
|
2690
|
+
//! \warning 取值大于0。
|
|
2691
|
+
//!
|
|
2692
|
+
uint32_t n = 1;
|
|
2693
|
+
//!
|
|
2694
|
+
//! \brief topk选择前k个值。
|
|
2695
|
+
//!
|
|
2696
|
+
//! \note 默认值为1。
|
|
2697
|
+
//!
|
|
2698
|
+
//! \warning 取值大于0。
|
|
2699
|
+
//!
|
|
2700
|
+
uint32_t k = 1;
|
|
2701
|
+
//!
|
|
2702
|
+
//! \brief 激活类型。
|
|
2703
|
+
//!
|
|
2704
|
+
//! \note 默认值为ACTIVATION_SIGMOID。
|
|
2705
|
+
//!
|
|
2706
|
+
//! \warning 取值范围为ACTIVATION_SIGMOID。
|
|
2707
|
+
//!
|
|
2708
|
+
ActivationType activationType = ACTIVATION_SIGMOID;
|
|
2709
|
+
//!
|
|
2710
|
+
//! \brief 是否归一化。
|
|
2711
|
+
//!
|
|
2712
|
+
//! \note 默认值为true。
|
|
2713
|
+
//!
|
|
2714
|
+
//! \warning 取值范围为true。
|
|
2715
|
+
//!
|
|
2716
|
+
bool isNorm = true;
|
|
2717
|
+
//!
|
|
2718
|
+
//! \brief 归一化后的乘系数。
|
|
2719
|
+
//!
|
|
2720
|
+
//! \note 默认值为1.0。
|
|
2721
|
+
//!
|
|
2722
|
+
//! \warning 取值范围为任意值。
|
|
2723
|
+
//!
|
|
2724
|
+
float scale = 1.0f;
|
|
2725
|
+
//!
|
|
2726
|
+
//! \brief 是否使能物理专家向逻辑专家的映射。
|
|
2727
|
+
//!
|
|
2728
|
+
//! \note 默认值为false。
|
|
2729
|
+
//!
|
|
2730
|
+
//! \warning 取值范围为false/true。
|
|
2731
|
+
//!
|
|
2732
|
+
bool enableExpertMapping = false;
|
|
2733
|
+
//!
|
|
2734
|
+
//! \brief 预留参数。
|
|
2735
|
+
//!
|
|
2736
|
+
//! \note 默认为全0的数组。
|
|
2737
|
+
//!
|
|
2738
|
+
//! \warning 数组元素必须均为0。
|
|
2739
|
+
//!
|
|
2740
|
+
uint8_t rsv[27] = {0};
|
|
1228
2741
|
};
|
|
1229
2742
|
|
|
1230
2743
|
//!
|
|
1231
|
-
//! \struct
|
|
2744
|
+
//! \struct MlaPreprocessParam
|
|
1232
2745
|
//!
|
|
1233
|
-
//! \brief
|
|
2746
|
+
//! \brief 融合rmsNormQuant、matmul、rope、reshapeAndCache,用于MLA预处理。
|
|
1234
2747
|
//!
|
|
1235
|
-
|
|
2748
|
+
//! \warning 所有参数目前均为未使用的预留参数,需支持泛化后启用,仅Atlas 800I A2推理产品支持该算子
|
|
2749
|
+
//!
|
|
2750
|
+
struct MlaPreprocessParam {
|
|
1236
2751
|
//!
|
|
1237
|
-
//! \
|
|
2752
|
+
//! \brief 经过matmul后拆分的dim大小
|
|
1238
2753
|
//!
|
|
1239
|
-
|
|
2754
|
+
uint32_t wdqDim = 0;
|
|
1240
2755
|
//!
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
2756
|
+
//! \brief q传入rope的dim大小
|
|
2757
|
+
//!
|
|
2758
|
+
uint32_t qRopeDim = 0;
|
|
2759
|
+
//!
|
|
2760
|
+
//! \brief k传入rope的dim大小
|
|
2761
|
+
//!
|
|
2762
|
+
uint32_t kRopeDim = 0;
|
|
2763
|
+
//!
|
|
2764
|
+
//! \brief epsilon,放在分母上防止除0。
|
|
2765
|
+
//!
|
|
2766
|
+
float epsilon = 1e-5;
|
|
2767
|
+
//!
|
|
2768
|
+
//! \brief q旋转系数,对半旋转是2,支持配置2、4或headDim。
|
|
2769
|
+
//!
|
|
2770
|
+
int32_t qRotaryCoeff = 2;
|
|
2771
|
+
//!
|
|
2772
|
+
//! \brief k旋转系数,对半旋转是2,支持配置2、4或headDim。
|
|
2773
|
+
//!
|
|
2774
|
+
int32_t kRotaryCoeff = 2;
|
|
2775
|
+
//!
|
|
2776
|
+
//! \brief wdq是否转置
|
|
2777
|
+
//!
|
|
2778
|
+
bool transposeWdq = true;
|
|
2779
|
+
//!
|
|
2780
|
+
//! \brief wuq是否转置
|
|
2781
|
+
//!
|
|
2782
|
+
bool transposeWuq = true;
|
|
2783
|
+
//!
|
|
2784
|
+
//! \brief wuk是否转置
|
|
2785
|
+
//!
|
|
2786
|
+
bool transposeWuk = true;
|
|
2787
|
+
//!
|
|
2788
|
+
//! \enum CacheMode
|
|
2789
|
+
//!
|
|
2790
|
+
//! \brief 指定cache的类型。
|
|
2791
|
+
//!
|
|
2792
|
+
enum CacheMode : uint8_t {
|
|
2793
|
+
KVCACHE = 0,
|
|
2794
|
+
KROPE_CTKV,
|
|
2795
|
+
INT8_NZCACHE,
|
|
2796
|
+
NZCACHE,
|
|
1244
2797
|
};
|
|
1245
|
-
//!
|
|
1246
|
-
|
|
1247
|
-
//!
|
|
1248
|
-
|
|
2798
|
+
//!
|
|
2799
|
+
//! \brief 指定cache的类型。
|
|
2800
|
+
//!
|
|
2801
|
+
CacheMode cacheMode = KVCACHE;
|
|
2802
|
+
//!
|
|
2803
|
+
//! \enum QuantMode
|
|
2804
|
+
//!
|
|
2805
|
+
//! \brief 指定RmsNorm量化的类型。
|
|
2806
|
+
//!
|
|
2807
|
+
enum QuantMode : uint16_t {
|
|
2808
|
+
PER_TENSOR_QUANT_ASYMM = 0,
|
|
2809
|
+
PER_TOKEN_QUANT_SYMM,
|
|
2810
|
+
PER_TOKEN_QUANT_ASYMM,
|
|
2811
|
+
UNQUANT,
|
|
2812
|
+
};
|
|
2813
|
+
//!
|
|
2814
|
+
//! \brief 指定RmsNorm量化的类型。
|
|
2815
|
+
//!
|
|
2816
|
+
QuantMode quantMode = PER_TENSOR_QUANT_ASYMM;
|
|
2817
|
+
//!
|
|
2818
|
+
//! \brief 预留参数
|
|
2819
|
+
//!
|
|
2820
|
+
uint8_t rsv[34] = {0};
|
|
1249
2821
|
};
|
|
1250
2822
|
|
|
1251
2823
|
//!
|
|
1252
|
-
//! \struct
|
|
2824
|
+
//! \struct ReshapeAndCacheOmniParam
|
|
1253
2825
|
//!
|
|
1254
|
-
//! \brief
|
|
2826
|
+
//! \brief omni压缩配套使用的reshapeAndCache
|
|
1255
2827
|
//!
|
|
1256
|
-
//!
|
|
1257
|
-
//! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ destRank < rankSize
|
|
2828
|
+
//! \warning 仅Atlas 800I A2推理产品支持该算子
|
|
1258
2829
|
//!
|
|
1259
|
-
|
|
1260
|
-
//!
|
|
2830
|
+
struct ReshapeAndCacheOmniParam {
|
|
2831
|
+
//!
|
|
2832
|
+
//! \brief 预留参数
|
|
2833
|
+
//!
|
|
2834
|
+
uint8_t rsv[8] = {0};
|
|
2835
|
+
};
|
|
2836
|
+
|
|
1261
2837
|
//!
|
|
1262
|
-
//! \
|
|
1263
|
-
//! rm -rf /dev/shm/sem.lccl*
|
|
1264
|
-
//! rm -rf /dev/shm/sem.hccl*
|
|
1265
|
-
//! ipcrm -a
|
|
1266
|
-
//! \endcode
|
|
2838
|
+
//! \brief MultiLatentAttention.
|
|
1267
2839
|
//!
|
|
1268
|
-
struct
|
|
1269
|
-
//! \brief 每张卡所属通信编号
|
|
1270
|
-
int rank = 0;
|
|
1271
|
-
//! \brief 通信的卡的数量
|
|
1272
|
-
int rankSize = 0;
|
|
1273
|
-
//! \brief 主通信编号
|
|
1274
|
-
int rankRoot = 0;
|
|
1275
|
-
//! \brief 通信域内数据接收端的rank编号.
|
|
1276
|
-
uint32_t destRank = 1;
|
|
1277
|
-
//! \brief 通信后端指示,仅支持"hccl".
|
|
1278
|
-
std::string backend = "hccl";
|
|
1279
|
-
//! \brief HCCL通信域指针
|
|
1280
|
-
HcclComm hcclComm = nullptr;
|
|
1281
|
-
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
1282
|
-
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2840
|
+
struct MultiLatentAttentionParam {
|
|
1283
2841
|
//!
|
|
1284
|
-
//! \brief
|
|
2842
|
+
//! \brief query头大小
|
|
1285
2843
|
//!
|
|
1286
|
-
|
|
1287
|
-
//! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
|
|
2844
|
+
int32_t headNum = 0;
|
|
1288
2845
|
//!
|
|
1289
|
-
|
|
1290
|
-
//!
|
|
1291
|
-
|
|
2846
|
+
//! \brief 算子tor值, 在Q*K^T后乘
|
|
2847
|
+
//!
|
|
2848
|
+
float qkScale = 1.0;
|
|
2849
|
+
//!
|
|
2850
|
+
//! \brief kv头数量
|
|
2851
|
+
//!
|
|
2852
|
+
int32_t kvHeadNum = 0;
|
|
2853
|
+
//!
|
|
2854
|
+
//! \enum MaskType
|
|
2855
|
+
//!
|
|
2856
|
+
//! \brief The type values of MaskType.
|
|
2857
|
+
//!
|
|
2858
|
+
enum MaskType : int {
|
|
2859
|
+
UNDEFINED = 0, //!< 默认值,全0的mask
|
|
2860
|
+
MASK_TYPE_SPEC, //!< qseqlen > 1时的mask
|
|
2861
|
+
MASK_TYPE_MASK_FREE, //!< mask free
|
|
2862
|
+
};
|
|
2863
|
+
//!
|
|
2864
|
+
//! \brief mask类型
|
|
2865
|
+
//!
|
|
2866
|
+
MaskType maskType = UNDEFINED;
|
|
2867
|
+
//!
|
|
2868
|
+
//! \enum CalcType
|
|
2869
|
+
//!
|
|
2870
|
+
//! \brief The type values of CalcType.
|
|
2871
|
+
//!
|
|
2872
|
+
enum CalcType : int {
|
|
2873
|
+
CALC_TYPE_UNDEFINED = 0, // 默认值
|
|
2874
|
+
CALC_TYPE_SPEC, // 支持传入大于1的qseqlen
|
|
2875
|
+
CALC_TYPE_RING, // ringAttention
|
|
2876
|
+
};
|
|
2877
|
+
//!
|
|
2878
|
+
//! \brief CalcType类型
|
|
2879
|
+
//!
|
|
2880
|
+
CalcType calcType = CALC_TYPE_UNDEFINED;
|
|
2881
|
+
//!
|
|
2882
|
+
//! \enum CacheMode
|
|
2883
|
+
//!
|
|
2884
|
+
//! \brief 指定cache的类型。
|
|
2885
|
+
//!
|
|
2886
|
+
enum CacheMode : uint8_t {
|
|
2887
|
+
KVCACHE = 0, // 拼接cache
|
|
2888
|
+
KROPE_CTKV, // 分离cache,默认值
|
|
2889
|
+
INT8_NZCACHE, // 高性能分离cache
|
|
2890
|
+
NZCACHE, // 非量化NZcache
|
|
2891
|
+
};
|
|
2892
|
+
//!
|
|
2893
|
+
//! \brief 指定cache的类型。
|
|
2894
|
+
//!
|
|
2895
|
+
CacheMode cacheMode = KVCACHE;
|
|
2896
|
+
//!
|
|
2897
|
+
//! \brief 预留参数
|
|
2898
|
+
//!
|
|
2899
|
+
uint8_t rsv[43] = {0};
|
|
1292
2900
|
};
|
|
1293
2901
|
|
|
1294
2902
|
//!
|
|
1295
|
-
//! \struct
|
|
2903
|
+
//! \struct RazorFusionAttentionParam
|
|
1296
2904
|
//!
|
|
1297
|
-
//! \brief
|
|
2905
|
+
//! \brief 多模态场景
|
|
1298
2906
|
//!
|
|
1299
|
-
|
|
1300
|
-
//!
|
|
2907
|
+
struct RazorFusionAttentionParam {
|
|
2908
|
+
//!
|
|
2909
|
+
//! \brief 算子headSize值, query头大小
|
|
2910
|
+
//!
|
|
2911
|
+
int32_t headNum = 1;
|
|
2912
|
+
//!
|
|
2913
|
+
//! \brief 算子kvHead值, kv头数量
|
|
2914
|
+
//!
|
|
2915
|
+
int32_t kvHeadNum = 1;
|
|
2916
|
+
//!
|
|
2917
|
+
//! \brief 算子tor值, 在Q*K^T后乘
|
|
2918
|
+
//!
|
|
2919
|
+
float qkScale = 1;
|
|
2920
|
+
//!
|
|
2921
|
+
//! \brief 图片的长度
|
|
2922
|
+
//!
|
|
2923
|
+
int32_t razorLen = 0;
|
|
2924
|
+
//!
|
|
2925
|
+
//! \brief 用于稀疏计算,表示attention需要和前几个Token计算关联,128的倍数
|
|
2926
|
+
//!
|
|
2927
|
+
int32_t preTokens = 0;
|
|
2928
|
+
//!
|
|
2929
|
+
//! \brief 用于稀疏计算,表示attention需要和前几个Token计算关联,128的倍数
|
|
2930
|
+
//!
|
|
2931
|
+
int32_t nextTokens = 0;
|
|
2932
|
+
//!
|
|
2933
|
+
//! \brief Q方向上图片的个数
|
|
2934
|
+
//!
|
|
2935
|
+
int32_t tileQ = 0;
|
|
2936
|
+
//!
|
|
2937
|
+
//! \brief Kv方向图片的个数
|
|
2938
|
+
//!
|
|
2939
|
+
int32_t tileKv = 0;
|
|
2940
|
+
//!
|
|
2941
|
+
//! \brief Q方向文本Token数量
|
|
2942
|
+
//!
|
|
2943
|
+
int32_t textQLen = 0;
|
|
2944
|
+
//!
|
|
2945
|
+
//! \brief Kv方向文本Token数量
|
|
2946
|
+
//!
|
|
2947
|
+
int32_t textKvLen = 0;
|
|
2948
|
+
//!
|
|
2949
|
+
//! \brief 预留参数
|
|
2950
|
+
//!
|
|
2951
|
+
uint8_t rsv[64] = {0};
|
|
2952
|
+
};
|
|
2953
|
+
|
|
1301
2954
|
//!
|
|
1302
|
-
//! \
|
|
1303
|
-
//! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
|
|
2955
|
+
//! \struct FaUpdateParam
|
|
1304
2956
|
//!
|
|
1305
|
-
//! \
|
|
1306
|
-
//! rm -rf /dev/shm/sem.lccl*
|
|
1307
|
-
//! rm -rf /dev/shm/sem.hccl*
|
|
1308
|
-
//! ipcrm -a
|
|
1309
|
-
//! \endcode
|
|
2957
|
+
//! \brief 主要功能为将flash attention输出的中间结果rowmax, rowsum, attention out三个局部结果更新成全局结果
|
|
1310
2958
|
//!
|
|
1311
|
-
struct
|
|
1312
|
-
//! \brief 每张卡所属通信编号
|
|
1313
|
-
int rank = 0;
|
|
1314
|
-
//! \brief 通信的卡的数量
|
|
1315
|
-
int rankSize = 0;
|
|
1316
|
-
//! \brief 主通信编号
|
|
1317
|
-
int rankRoot = 0;
|
|
1318
|
-
//! \brief 通信域内数据发送端的rank编号.
|
|
1319
|
-
uint32_t srcRank = 1;
|
|
1320
|
-
//! \brief 通信后端指示,仅支持"hccl".
|
|
1321
|
-
std::string backend = "hccl";
|
|
1322
|
-
//! \brief HCCL通信域指针
|
|
1323
|
-
HcclComm hcclComm = nullptr;
|
|
1324
|
-
//! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
|
|
1325
|
-
CommMode commMode = COMM_MULTI_PROCESS;
|
|
2959
|
+
struct FaUpdateParam {
|
|
1326
2960
|
//!
|
|
1327
|
-
//! \
|
|
2961
|
+
//! \enum FaUpdateType
|
|
1328
2962
|
//!
|
|
1329
|
-
//!
|
|
1330
|
-
//! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
|
|
2963
|
+
//! \brief 指定下标需要执行的操作类型。
|
|
1331
2964
|
//!
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
2965
|
+
enum FaUpdateType {
|
|
2966
|
+
DECODE_UPDATE = 0, //!< 默认值。decode_update。
|
|
2967
|
+
};
|
|
2968
|
+
//!
|
|
2969
|
+
//! \brief 指定下标需要执行的操作类型。
|
|
2970
|
+
//!
|
|
2971
|
+
//! \warning 目前支持取值为DECODE_UPDATE。
|
|
2972
|
+
//!
|
|
2973
|
+
FaUpdateType faUpdateType = DECODE_UPDATE;
|
|
2974
|
+
//!
|
|
2975
|
+
//! \brief 序列并行的并行度SP。
|
|
2976
|
+
//!
|
|
2977
|
+
//! \note 默认值为1。
|
|
2978
|
+
//!
|
|
2979
|
+
uint32_t sp = 1;
|
|
2980
|
+
//!
|
|
2981
|
+
//! \brief 预留参数
|
|
2982
|
+
//!
|
|
2983
|
+
uint8_t rsv[64] = {0};
|
|
1335
2984
|
};
|
|
1336
2985
|
|
|
1337
|
-
|
|
1338
|
-
|
|
2986
|
+
//!
|
|
2987
|
+
//! \struct PagedCacheLoadParam
|
|
2988
|
+
//!
|
|
2989
|
+
//! \brief reshapeandcache反向
|
|
2990
|
+
//!
|
|
2991
|
+
struct PagedCacheLoadParam {
|
|
2992
|
+
//!
|
|
2993
|
+
//! \brief 预留参数
|
|
2994
|
+
//!
|
|
2995
|
+
uint8_t rsv[64] = {0};
|
|
2996
|
+
};
|
|
2997
|
+
} // namespace infer
|
|
2998
|
+
} // namespace atb
|
|
1339
2999
|
#endif
|