mindspore 2.6.0rc1__cp39-none-any.whl → 2.7.0__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (4997) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +1290 -0
  3. mindspore/__init__.py +2 -2
  4. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  5. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  6. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  7. mindspore/_checkparam.py +42 -11
  8. mindspore/_extends/builtin_operations.py +3 -3
  9. mindspore/_extends/optimize/__init__.py +23 -0
  10. mindspore/_extends/optimize/cell_utils.py +96 -0
  11. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  13. mindspore/_extends/parse/__init__.py +3 -3
  14. mindspore/_extends/parse/compile_config.py +44 -22
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  16. mindspore/_extends/parse/parser.py +65 -84
  17. mindspore/_extends/parse/resources.py +39 -0
  18. mindspore/_extends/parse/standard_method.py +58 -14
  19. mindspore/_extends/parse/trope.py +8 -1
  20. mindspore/_extends/pijit/__init__.py +1 -2
  21. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  22. mindspore/amp.py +4 -22
  23. mindspore/boost/adasum.py +1 -1
  24. mindspore/boost/boost_cell_wrapper.py +4 -4
  25. mindspore/common/__init__.py +43 -12
  26. mindspore/common/_grad_function.py +2 -1
  27. mindspore/common/_pijit_context.py +28 -7
  28. mindspore/common/_stub_tensor.py +1 -209
  29. mindspore/common/_tensor_cpp_method.py +1 -1
  30. mindspore/common/_tensor_docs.py +3227 -3102
  31. mindspore/common/_utils.py +9 -1
  32. mindspore/common/api.py +377 -203
  33. mindspore/common/dtype.py +108 -57
  34. mindspore/common/dump.py +11 -16
  35. mindspore/common/dynamic_shape/__init__.py +0 -0
  36. mindspore/common/dynamic_shape/auto_dynamic_shape.py +498 -0
  37. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  38. mindspore/common/file_system.py +59 -9
  39. mindspore/common/generator.py +5 -3
  40. mindspore/common/hook_handle.py +33 -5
  41. mindspore/common/jit_config.py +1 -1
  42. mindspore/common/jit_trace.py +84 -105
  43. mindspore/common/np_dtype.py +3 -3
  44. mindspore/common/parameter.py +27 -29
  45. mindspore/common/recompute.py +5 -7
  46. mindspore/common/sparse_tensor.py +0 -3
  47. mindspore/common/symbol.py +0 -1
  48. mindspore/common/tensor.py +117 -131
  49. mindspore/communication/_comm_helper.py +46 -4
  50. mindspore/communication/management.py +79 -7
  51. mindspore/context.py +67 -55
  52. mindspore/dataset/__init__.py +1 -1
  53. mindspore/dataset/audio/transforms.py +1 -1
  54. mindspore/dataset/core/config.py +38 -4
  55. mindspore/dataset/engine/datasets.py +350 -322
  56. mindspore/dataset/engine/datasets_user_defined.py +70 -24
  57. mindspore/dataset/engine/iterators.py +2 -2
  58. mindspore/dataset/engine/obs/config_loader.py +2 -2
  59. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  60. mindspore/dataset/transforms/c_transforms.py +2 -2
  61. mindspore/dataset/transforms/py_transforms.py +7 -3
  62. mindspore/dataset/transforms/transforms.py +10 -6
  63. mindspore/dataset/vision/__init__.py +1 -1
  64. mindspore/dataset/vision/py_transforms.py +8 -8
  65. mindspore/dataset/vision/transforms.py +17 -5
  66. mindspore/dataset/vision/utils.py +632 -21
  67. mindspore/dataset/vision/validators.py +1 -0
  68. mindspore/device_context/ascend/device.py +1 -1
  69. mindspore/device_context/ascend/op_tuning.py +35 -1
  70. mindspore/device_context/gpu/__init__.py +2 -2
  71. mindspore/device_context/gpu/device.py +1 -1
  72. mindspore/device_context/gpu/op_precision.py +4 -2
  73. mindspore/device_context/gpu/op_tuning.py +6 -3
  74. mindspore/device_manager.py +16 -9
  75. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
  76. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  77. mindspore/experimental/optim/adadelta.py +13 -20
  78. mindspore/experimental/optim/adagrad.py +15 -22
  79. mindspore/experimental/optim/adam.py +17 -24
  80. mindspore/experimental/optim/adamax.py +14 -22
  81. mindspore/experimental/optim/adamw.py +28 -34
  82. mindspore/experimental/optim/asgd.py +15 -25
  83. mindspore/experimental/optim/lr_scheduler.py +27 -45
  84. mindspore/experimental/optim/nadam.py +14 -24
  85. mindspore/experimental/optim/optimizer.py +13 -23
  86. mindspore/experimental/optim/radam.py +18 -24
  87. mindspore/experimental/optim/rmsprop.py +14 -25
  88. mindspore/experimental/optim/rprop.py +15 -26
  89. mindspore/experimental/optim/sgd.py +9 -19
  90. mindspore/hal/__init__.py +4 -4
  91. mindspore/hal/contiguous_tensors_handle.py +2 -2
  92. mindspore/hal/memory.py +27 -7
  93. mindspore/include/api/cell.h +65 -5
  94. mindspore/include/api/cfg.h +24 -7
  95. mindspore/include/api/context.h +1 -0
  96. mindspore/include/api/delegate.h +10 -2
  97. mindspore/include/api/dual_abi_helper.h +100 -19
  98. mindspore/include/api/graph.h +14 -1
  99. mindspore/include/api/kernel.h +16 -3
  100. mindspore/include/api/kernel_api.h +9 -1
  101. mindspore/include/api/metrics/accuracy.h +9 -0
  102. mindspore/include/api/model.h +8 -1
  103. mindspore/include/api/model_group.h +4 -0
  104. mindspore/include/api/model_parallel_runner.h +2 -0
  105. mindspore/include/api/status.h +48 -10
  106. mindspore/include/api/types.h +8 -3
  107. mindspore/include/c_api/model_c.h +0 -58
  108. mindspore/include/c_api/tensor_c.h +0 -26
  109. mindspore/include/dataset/constants.h +9 -0
  110. mindspore/include/dataset/vision_ascend.h +1 -1
  111. mindspore/include/mindapi/base/type_id.h +3 -0
  112. mindspore/include/mindapi/base/types.h +7 -0
  113. mindspore/include/mindspore/ccsrc/availability/silent_check/silent_check.h +3 -4
  114. mindspore/include/mindspore/ccsrc/backend/backend_manager/backend_jit_config.h +47 -4
  115. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/adapter/graph_kernel_cluster_cloud.h +1 -0
  116. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/adapter/graph_kernel_comm_info_manager.h +1 -1
  117. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/eliminate_redundant_output.h +1 -0
  118. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/graph_kernel_expander.h +1 -1
  119. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/graph_kernel_utils.h +25 -1
  120. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/core/update_state_formatter.h +2 -1
  121. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/depend_edge_elimination.h +61 -0
  122. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/graph_kernel_flags.h +10 -1
  123. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/model/lite_graph.h +1 -1
  124. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/model/node.h +5 -6
  125. mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_reuse.h +1 -2
  126. mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_reuse_checker.h +0 -1
  127. mindspore/include/mindspore/ccsrc/backend/common/mem_reuse/mem_swap_manager.h +0 -1
  128. mindspore/include/mindspore/ccsrc/backend/common/optimizer/cache_manager.h +1 -1
  129. mindspore/include/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/convert_custom_op.h +2 -2
  130. mindspore/include/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/link_custom_op.h +1 -1
  131. mindspore/include/mindspore/ccsrc/backend/common/pass/add_attr_to_node/add_attr_to_node_register.h +1 -3
  132. mindspore/include/mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h +1 -1
  133. mindspore/include/mindspore/ccsrc/backend/common/pass/convert_list_to_tuple.h +2 -1
  134. mindspore/include/mindspore/ccsrc/backend/common/pass/custom_defined_depend.h +1 -3
  135. mindspore/include/mindspore/ccsrc/backend/common/pass/gradients_allreduce_depend_last_send.h +1 -2
  136. mindspore/include/mindspore/ccsrc/backend/common/pass/graph_view_replace_pass.h +1 -1
  137. mindspore/include/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_communication.h +2 -1
  138. mindspore/include/mindspore/ccsrc/backend/common/pass/ir_fusion/flash_attention_fusion.h +72 -0
  139. mindspore/include/mindspore/ccsrc/backend/common/pass/label_1f1b_overlap_node.h +1 -1
  140. mindspore/include/mindspore/ccsrc/backend/common/pass/mindir/add_depend_for_adamw.h +1 -3
  141. mindspore/include/mindspore/ccsrc/backend/common/pass/mindir/all_to_all_unify_mindir.h +8 -0
  142. mindspore/include/mindspore/ccsrc/backend/common/pass/optimize_gradients_allreduce_overlap.h +1 -1
  143. mindspore/include/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.h +1 -0
  144. mindspore/include/mindspore/ccsrc/backend/common/session/exec_order_builder.h +0 -2
  145. mindspore/include/mindspore/ccsrc/backend/common/session/executor.h +0 -41
  146. mindspore/include/mindspore/ccsrc/backend/common/session/kernel_graph_mgr.h +14 -10
  147. mindspore/include/mindspore/ccsrc/backend/common/session/session_basic.h +15 -31
  148. mindspore/include/mindspore/ccsrc/backend/common/somas/somas.h +1 -1
  149. mindspore/include/mindspore/ccsrc/backend/common/somas/somas_node.h +8 -2
  150. mindspore/include/mindspore/ccsrc/backend/ge_backend/dump/hook_dynamic_loader.h +5 -0
  151. mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_device_res_manager.h +1 -2
  152. mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_graph_executor.h +13 -14
  153. mindspore/include/mindspore/ccsrc/backend/ge_backend/executor/ge_utils.h +0 -2
  154. mindspore/include/mindspore/ccsrc/backend/ge_backend/ge_backend.h +10 -9
  155. mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/convert.h +2 -1
  156. mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/graph_runner.h +2 -1
  157. mindspore/include/mindspore/ccsrc/backend/ge_backend/graph_ir/utils.h +12 -14
  158. mindspore/include/mindspore/ccsrc/backend/ge_backend/pass/matmul_allreduce_fusion.h +51 -0
  159. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/abstract_actor.h +26 -19
  160. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_common.h +14 -5
  161. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_dump.h +2 -1
  162. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/actor_set.h +1 -1
  163. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/control_actor.h +38 -38
  164. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/entrance_actor.h +9 -9
  165. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/exit_actor.h +8 -8
  166. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/gather_actor.h +6 -6
  167. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/stack_actor.h +11 -11
  168. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/control_flow/switch_actor.h +2 -2
  169. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/data_prepare_actor.h +16 -16
  170. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/data_source_actor.h +9 -9
  171. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/debug_actor.h +8 -8
  172. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/debug_aware_actor.h +2 -2
  173. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/loop_count_actor.h +6 -6
  174. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/memory_aware_actor.h +6 -6
  175. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/memory_manager_actor.h +8 -11
  176. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/output_actor.h +8 -5
  177. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/profiler_actor.h +2 -2
  178. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/recorder_actor.h +2 -2
  179. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/actor/super_kernel_actor.h +17 -17
  180. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/device_tensor_store.h +24 -24
  181. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/graph_compiler.h +8 -12
  182. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/graph_partition.h +49 -0
  183. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/scheduler_helper.h +1 -1
  184. mindspore/include/mindspore/ccsrc/backend/ge_backend/runtime/segment_runner.h +50 -0
  185. mindspore/include/mindspore/ccsrc/backend/ge_backend/utils/device_address_utils.h +4 -7
  186. mindspore/include/mindspore/ccsrc/backend/graph_compiler/op_backend.h +13 -24
  187. mindspore/include/mindspore/ccsrc/backend/graph_compiler/transform.h +2 -8
  188. mindspore/include/mindspore/ccsrc/backend/graph_compiler/vm.h +1 -5
  189. mindspore/include/mindspore/ccsrc/backend/ms_backend/ms_backend.h +0 -39
  190. mindspore/include/mindspore/ccsrc/backend/ms_backend/ms_backend_base.h +10 -5
  191. mindspore/include/mindspore/ccsrc/debug/checksum/checksum.h +35 -0
  192. mindspore/include/mindspore/ccsrc/debug/checksum/checksum_kernel.h +64 -0
  193. mindspore/include/mindspore/ccsrc/debug/checksum/checksum_mgr.h +50 -0
  194. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/check_overflow.h +1 -11
  195. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/common.h +0 -13
  196. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/kernel_launcher.h +3 -3
  197. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/mean.h +1 -1
  198. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/mem_manager.h +65 -0
  199. mindspore/include/mindspore/ccsrc/debug/data_dump/device_statistic/statistic_kernel.h +7 -8
  200. mindspore/include/mindspore/ccsrc/debug/data_dump/overflow_counter.h +6 -1
  201. mindspore/include/mindspore/ccsrc/debug/data_dump/tensor_info_collect.h +21 -6
  202. mindspore/include/mindspore/ccsrc/debug/data_dump/tensor_statistic.h +2 -2
  203. mindspore/include/mindspore/ccsrc/debug/debug_services.h +1 -4
  204. mindspore/include/mindspore/ccsrc/debug/debugger/debugger_utils.h +7 -8
  205. mindspore/include/mindspore/ccsrc/debug/debugger/tensor_summary.h +0 -53
  206. mindspore/include/mindspore/ccsrc/debug/dump/tensordump_control.h +6 -2
  207. mindspore/include/mindspore/ccsrc/debug/dump/utils.h +30 -0
  208. mindspore/include/mindspore/ccsrc/debug/profiler/mstx/mstx_impl.h +37 -24
  209. mindspore/include/mindspore/ccsrc/debug/profiler/mstx/mstx_symbol.h +63 -4
  210. mindspore/include/mindspore/ccsrc/debug/profiler/profiler.h +37 -15
  211. mindspore/include/mindspore/ccsrc/debug/profiler/profiling.h +9 -6
  212. mindspore/include/mindspore/ccsrc/debug/profiler/profiling_framework_data.h +2 -0
  213. mindspore/include/mindspore/ccsrc/debug/profiler/python_obj_pointer.h +7 -7
  214. mindspore/include/mindspore/ccsrc/debug/profiler/report_data.h +23 -0
  215. mindspore/include/mindspore/ccsrc/debug/profiler/thread.h +2 -2
  216. mindspore/include/mindspore/ccsrc/debug/summary/summary.h +1 -1
  217. mindspore/include/mindspore/ccsrc/debug/utils.h +0 -5
  218. mindspore/include/mindspore/ccsrc/distributed/cluster/actor_route_table_proxy.h +1 -1
  219. mindspore/include/mindspore/ccsrc/distributed/cluster/actor_route_table_service.h +0 -2
  220. mindspore/include/mindspore/ccsrc/distributed/cluster/topology/meta_server_node.h +5 -5
  221. mindspore/include/mindspore/ccsrc/distributed/persistent/storage/local_file.h +0 -1
  222. mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/connection.h +0 -1
  223. mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/socket_operation.h +0 -1
  224. mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/tcp_comm.h +0 -1
  225. mindspore/include/mindspore/ccsrc/distributed/rpc/tcp/tcp_socket_operation.h +0 -1
  226. mindspore/include/mindspore/ccsrc/frontend/ir/primitive_py.h +1 -1
  227. mindspore/include/mindspore/ccsrc/frontend/ir/py_execute_py.h +11 -28
  228. mindspore/include/mindspore/ccsrc/frontend/ir/storage.h +44 -0
  229. mindspore/include/mindspore/ccsrc/frontend/ir/storage_base.h +45 -0
  230. mindspore/include/mindspore/ccsrc/frontend/ir/tensor_py.h +13 -22
  231. mindspore/include/mindspore/ccsrc/frontend/np_dtypes/np_dtypes.h +29 -0
  232. mindspore/include/mindspore/ccsrc/frontend/operator/composite/composite.h +76 -5
  233. mindspore/include/mindspore/ccsrc/frontend/operator/composite/do_signature.h +2 -1
  234. mindspore/include/mindspore/ccsrc/frontend/operator/composite/functional_overload.h +46 -0
  235. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/common/meta_impl.h +161 -22
  236. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/common/utils.h +38 -61
  237. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/any.h +27 -0
  238. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/any_ext.h +27 -0
  239. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/conv3d_padding.h +41 -0
  240. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/einsum_ext.h +47 -0
  241. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/func_dropout_ext.h +28 -0
  242. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/func_max_pool2d.h +28 -0
  243. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm.h +28 -0
  244. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_backward.h +28 -0
  245. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_backward_fusion.h +28 -0
  246. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_common_utils.h +30 -0
  247. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2.h +28 -0
  248. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2_backward.h +28 -0
  249. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/gmm_v2_backward_fusion.h +28 -0
  250. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/inplace_exponential.h +31 -0
  251. mindspore/include/mindspore/ccsrc/frontend/operator/meta_dsl/func_op/moe_token_unpermute.h +28 -0
  252. mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h +22 -3
  253. mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h +21 -10
  254. mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/grad.h +5 -2
  255. mindspore/include/mindspore/ccsrc/frontend/optimizer/ad/pynative_jit_grad.h +11 -2
  256. mindspore/include/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.h +3 -13
  257. mindspore/include/mindspore/ccsrc/frontend/optimizer/cse_pass.h +3 -7
  258. mindspore/include/mindspore/ccsrc/frontend/optimizer/graph_transform.h +3 -2
  259. mindspore/include/mindspore/ccsrc/frontend/optimizer/inplace_input_replace.h +30 -0
  260. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/add_forward_monad_depend.h +51 -37
  261. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h +7 -104
  262. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/call_graph_tuple_transform.h +2 -1
  263. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/check_invalid_view_inplace_dout.h +46 -0
  264. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/const_output_eliminate.h +4 -0
  265. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/expand_dump_flag.h +1 -0
  266. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/get_grad_eliminate.h +5 -1
  267. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h +3 -3
  268. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/inline.h +35 -9
  269. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/item_dict_eliminate.h +3 -1
  270. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h +1 -17
  271. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/j_node_and_user_rematch.h +1 -1
  272. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/loop_unroll.h +1 -0
  273. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/parameter_eliminate.h +3 -3
  274. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h +5 -3
  275. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/recompute_prepare.h +1 -0
  276. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h +9 -3
  277. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/stack_unstack_eliminate.h +13 -6
  278. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/stopgrad_eliminate.h +3 -1
  279. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass/switch_or_switch_layer_defer_inline.h +13 -5
  280. mindspore/include/mindspore/ccsrc/frontend/optimizer/irpass.h +7 -7
  281. mindspore/include/mindspore/ccsrc/frontend/optimizer/opt.h +10 -5
  282. mindspore/include/mindspore/ccsrc/frontend/optimizer/optimizer.h +55 -280
  283. mindspore/include/mindspore/ccsrc/frontend/optimizer/pattern_matcher.h +8 -2
  284. mindspore/include/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h +5 -0
  285. mindspore/include/mindspore/ccsrc/frontend/parallel/auto_parallel/stage_compute.h +1 -1
  286. mindspore/include/mindspore/ccsrc/frontend/parallel/came_parallel_handler.h +1 -1
  287. mindspore/include/mindspore/ccsrc/frontend/parallel/costmodel_context.h +1 -1
  288. mindspore/include/mindspore/ccsrc/frontend/parallel/dynamic_creator.h +1 -1
  289. mindspore/include/mindspore/ccsrc/frontend/parallel/dynamic_shape/dynamic_shape.h +1 -1
  290. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/flops_collection.h +1 -1
  291. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/fold_pipeline_split_utils.h +1 -1
  292. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/grad_accumulation_utils.h +2 -1
  293. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.h +2 -0
  294. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.h +3 -2
  295. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h +2 -0
  296. mindspore/include/mindspore/ccsrc/frontend/parallel/graph_util/parallel_tensordump.h +50 -14
  297. mindspore/include/mindspore/ccsrc/frontend/parallel/interleaved_parallel/interleaved_parallel.h +1 -1
  298. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h +0 -11
  299. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/addn_info.h +1 -1
  300. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/apply_rotary_pos_emb_info.h +1 -1
  301. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h +44 -0
  302. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/avgpool_info.h +1 -1
  303. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/cdist_info.h +1 -1
  304. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/conv3d_info.h +1 -1
  305. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/f_f_n_info.h +1 -1
  306. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fft_info.h +1 -1
  307. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fillv2_info.h +1 -1
  308. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/flash_attention_score_info.h +20 -20
  309. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/fused_infer_attention_score_info.h +15 -15
  310. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/gamma_info.h +1 -1
  311. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/gather_info.h +16 -0
  312. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/grid_sampler2d.h +1 -1
  313. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/group_norm_info.h +1 -1
  314. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/incre_flash_attention_info.h +1 -1
  315. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/index_add_info.h +55 -0
  316. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/inplace_op_info.h +1 -1
  317. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/iou_info.h +1 -1
  318. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kldiv_loss_info.h +1 -1
  319. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kv_cache_mgr_info.h +1 -1
  320. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/kv_cache_scatter_update_info.h +1 -1
  321. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/lin_space_info.h +1 -1
  322. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h +34 -0
  323. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h +14 -1
  324. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/max_avg_pool_3d_info.h +1 -1
  325. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_compute_expert_tokens_info.h +1 -1
  326. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_finalize_routing_info.h +1 -1
  327. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_gating_top_k_softmax_info.h +1 -1
  328. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/moe_init_routing_info.h +1 -1
  329. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h +7 -2
  330. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h +18 -1
  331. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/pad_info.h +1 -1
  332. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/paged_attention_info.h +1 -1
  333. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/paged_attention_mask_info.h +1 -1
  334. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/prompt_flash_attention_info.h +1 -1
  335. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_batch_matmul_info.h +1 -1
  336. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_info.h +1 -1
  337. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/quant_linear_sparse_info.h +1 -1
  338. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reduce_base_method_info.h +3 -1
  339. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reshape_and_cache_info.h +1 -1
  340. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h +9 -4
  341. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_math_ops_info.h +1 -1
  342. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_nd_ops_info.h +1 -1
  343. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/scatter_ops_info.h +1 -1
  344. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/self_define_shard_info.h +1 -1
  345. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/stand_alone_info.h +1 -1
  346. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/topkrouter_info.h +1 -1
  347. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/topprouter_info.h +55 -0
  348. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/tracev2_info.h +1 -1
  349. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/tril_info.h +1 -1
  350. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/weight_quant_batch_matmul_info.h +1 -1
  351. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/wkv_info.h +1 -1
  352. mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_postprocessor.h +1 -1
  353. mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_preprocessor.h +2 -2
  354. mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_processor.h +5 -4
  355. mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_processor_context.h +1 -1
  356. mindspore/include/mindspore/ccsrc/frontend/parallel/parallel_whole_graph_processor.h +1 -1
  357. mindspore/include/mindspore/ccsrc/frontend/parallel/parameter_manager.h +0 -1
  358. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/allreduce_slice_to_reducescatter.h +1 -1
  359. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/assign_add_opt.h +1 -1
  360. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/begin_end_overlap_inline.h +1 -1
  361. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/bias_add_comm_swap.h +1 -1
  362. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/dataset_reader_optimizer.h +1 -1
  363. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/fias_sp.h +1 -1
  364. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/flash_sp.h +1 -1
  365. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/float32_redistribution.h +1 -1
  366. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/full_micro_interleaved_order_control.h +1 -1
  367. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/handle_group_info.h +1 -1
  368. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_branches_utils.h +1 -1
  369. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_parallel_branches.h +1 -1
  370. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/interleave_split_concat_branches.h +1 -1
  371. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/label_fine_grained_interleaved_index.h +1 -1
  372. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/label_micro_interleaved_index.h +1 -1
  373. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/matmul_add_comm_reduction.h +1 -1
  374. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_cast_opt.h +1 -1
  375. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_comm.h +1 -1
  376. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/merge_recompute_call_nodes.h +28 -0
  377. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/micro_interleaved_order_control.h +1 -1
  378. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/offloading_packed_expert.h +1 -1
  379. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/optimize_parallel_allgather_comm.h +1 -1
  380. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_comm.h +1 -1
  381. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_flash_sp.h +1 -1
  382. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_grad_ring_attention.h +1 -1
  383. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_gradmatmul_and_gradallreduce.h +1 -1
  384. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_opt_shard_in_pipeline.h +1 -1
  385. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_param_gather.h +1 -1
  386. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_allgather_and_flashattention_grad.h +1 -1
  387. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_and_grad_model_parallel.h +1 -1
  388. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/overlap_recompute_comm.h +28 -0
  389. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/pass_utils.h +1 -1
  390. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/remove_cast_before_assign_add.h +1 -1
  391. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/reorder_send_recv_between_fp_bp.h +1 -1
  392. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/slice_activation_in_cell_share_recompute.h +1 -1
  393. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/split_layernorm_comm_fp.h +1 -1
  394. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/split_matmul_comm_elementwise_fp.h +1 -1
  395. mindspore/include/mindspore/ccsrc/frontend/parallel/pass/swap_dp_allreduce_reducescatter.h +1 -1
  396. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/detach_backward.h +70 -0
  397. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/fold_pipeline_transformer.h +1 -1
  398. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/gpipe_interleave_scheduler.h +1 -1
  399. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_interleave.h +14 -3
  400. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_scheduler.h +1 -1
  401. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/seqpipe_scheduler.h +1 -1
  402. mindspore/include/mindspore/ccsrc/frontend/parallel/pipeline_transformer/zero_bubble_v.h +124 -0
  403. mindspore/include/mindspore/ccsrc/frontend/parallel/shard/shard.h +1 -1
  404. mindspore/include/mindspore/ccsrc/frontend/parallel/step_assigned_parallel.h +1 -1
  405. mindspore/include/mindspore/ccsrc/frontend/parallel/step_parallel_utils.h +10 -3
  406. mindspore/include/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/strategy_checkpoint_info.h +1 -1
  407. mindspore/include/mindspore/ccsrc/frontend/parallel/strategy_loader.h +1 -1
  408. mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_utils.h +1 -1
  409. mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/shared_parameter.h +1 -1
  410. mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h +3 -0
  411. mindspore/include/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_transform.h +2 -2
  412. mindspore/include/mindspore/ccsrc/include/backend/anf_runtime_algorithm.h +29 -13
  413. mindspore/include/mindspore/ccsrc/include/backend/data_queue/data_queue_mgr.h +1 -5
  414. mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_control.h +1 -0
  415. mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h +4 -5
  416. mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/dump_utils.h +1 -0
  417. mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/tensor_stat_dump.h +0 -3
  418. mindspore/include/mindspore/ccsrc/include/backend/debug/debugger/debugger.h +0 -2
  419. mindspore/include/mindspore/ccsrc/include/backend/debug/execute_order_tracker/execute_order_tracker.h +14 -4
  420. mindspore/include/mindspore/ccsrc/include/backend/debug/tensor_data.h +0 -19
  421. mindspore/include/mindspore/ccsrc/include/backend/distributed/cluster/tcp_store.h +53 -0
  422. mindspore/include/mindspore/ccsrc/include/backend/distributed/collective/collective_manager.h +17 -10
  423. mindspore/include/mindspore/ccsrc/include/backend/distributed/constants.h +1 -10
  424. mindspore/include/mindspore/ccsrc/include/backend/distributed/embedding_cache/embedding_hash_map.h +0 -2
  425. mindspore/include/mindspore/ccsrc/include/backend/distributed/ps/ps_context.h +0 -6
  426. mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/tcp/constants.h +2 -1
  427. mindspore/include/mindspore/ccsrc/include/backend/kernel_graph.h +0 -1
  428. mindspore/include/mindspore/ccsrc/include/backend/kernel_info.h +8 -10
  429. mindspore/include/mindspore/ccsrc/include/backend/mbuf_device_address.h +5 -5
  430. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/abstract_dynamic_mem_pool.h +10 -5
  431. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/address_discretizer.h +63 -0
  432. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/dynamic_mem_pool.h +12 -0
  433. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/max_segment_tree.h +181 -0
  434. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_dynamic_allocator.h +0 -1
  435. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_pool_util.h +98 -0
  436. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/mem_tracker.h +11 -107
  437. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/race_checker.h +64 -0
  438. mindspore/include/mindspore/ccsrc/include/backend/mem_reuse/tracker_graph.h +91 -0
  439. mindspore/include/mindspore/ccsrc/include/backend/optimizer/helper.h +0 -2
  440. mindspore/include/mindspore/ccsrc/include/backend/optimizer/inplace_node_pass.h +1 -9
  441. mindspore/include/mindspore/ccsrc/include/backend/optimizer/optimizer.h +3 -6
  442. mindspore/include/mindspore/ccsrc/include/backend/optimizer/pattern_engine.h +0 -2
  443. mindspore/include/mindspore/ccsrc/include/backend/optimizer/visitor.h +2 -0
  444. mindspore/include/mindspore/ccsrc/include/backend/py_execute_utils.h +2 -1
  445. mindspore/include/mindspore/ccsrc/include/common/debug/common.h +2 -1
  446. mindspore/include/mindspore/ccsrc/include/common/debug/draw.h +3 -1
  447. mindspore/include/mindspore/ccsrc/include/common/debug/dump_proto.h +2 -10
  448. mindspore/include/mindspore/ccsrc/include/common/expander/core/emitter.h +3 -2
  449. mindspore/include/mindspore/ccsrc/include/common/expander/core/node.h +1 -1
  450. mindspore/include/mindspore/ccsrc/include/common/fallback.h +9 -0
  451. mindspore/include/mindspore/ccsrc/include/common/pybind_api/api_register.h +7 -1
  452. mindspore/include/mindspore/ccsrc/include/common/pynative/abstract_converter.h +0 -4
  453. mindspore/include/mindspore/ccsrc/include/common/pynative/adapter.h +2 -5
  454. mindspore/include/mindspore/ccsrc/include/common/pynative/common_utils.h +5 -1
  455. mindspore/include/mindspore/ccsrc/include/common/pynative/grad_state.h +12 -0
  456. mindspore/include/mindspore/ccsrc/include/common/pynative/variable.h +326 -0
  457. mindspore/include/mindspore/ccsrc/include/common/random.h +1 -2
  458. mindspore/include/mindspore/ccsrc/include/common/runtime_conf/runtime_conf.h +11 -7
  459. mindspore/include/mindspore/ccsrc/include/common/runtime_conf/thread_bind_core.h +12 -10
  460. mindspore/include/mindspore/ccsrc/include/common/symbol_engine/symbol_engine_impl.h +16 -1
  461. mindspore/include/mindspore/ccsrc/include/common/utils/anfalgo.h +13 -2
  462. mindspore/include/mindspore/ccsrc/include/common/utils/comm_manager.h +0 -1
  463. mindspore/include/mindspore/ccsrc/include/common/utils/compile_cache_context.h +4 -2
  464. mindspore/include/mindspore/ccsrc/include/common/utils/convert_utils.h +18 -4
  465. mindspore/include/mindspore/ccsrc/include/common/utils/convert_utils_py.h +2 -10
  466. mindspore/include/mindspore/ccsrc/include/common/utils/cse.h +0 -1
  467. mindspore/include/mindspore/ccsrc/include/common/utils/json_operation_utils.h +1 -1
  468. mindspore/include/mindspore/ccsrc/include/common/utils/ms_device_shape_transfer.h +0 -1
  469. mindspore/include/mindspore/ccsrc/include/common/utils/parallel_context.h +10 -2
  470. mindspore/include/mindspore/ccsrc/include/common/utils/python_adapter.h +3 -3
  471. mindspore/include/mindspore/ccsrc/include/common/utils/stub_tensor.h +1 -3
  472. mindspore/include/mindspore/ccsrc/include/common/utils/summary/event_writer.h +1 -1
  473. mindspore/include/mindspore/ccsrc/include/common/utils/tensor_py.h +24 -117
  474. mindspore/include/mindspore/ccsrc/include/common/utils/tensor_py_wrapper.h +1 -7
  475. mindspore/include/mindspore/ccsrc/include/common/utils/tensor_utils.h +3 -3
  476. mindspore/include/mindspore/ccsrc/include/common/utils/utils.h +8 -8
  477. mindspore/include/mindspore/ccsrc/include/common/visible.h +0 -10
  478. mindspore/include/mindspore/ccsrc/kernel/environ_manager.h +2 -0
  479. mindspore/include/mindspore/ccsrc/kernel/framework_utils.h +3 -0
  480. mindspore/include/mindspore/ccsrc/kernel/graph_kernel_info.h +1 -1
  481. mindspore/include/mindspore/ccsrc/kernel/philox_random.h +47 -87
  482. mindspore/include/mindspore/ccsrc/minddata/dataset/api/python/python_mp.h +26 -6
  483. mindspore/include/mindspore/ccsrc/minddata/dataset/core/config_manager.h +9 -0
  484. mindspore/include/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h +1 -1
  485. mindspore/include/mindspore/ccsrc/minddata/dataset/core/data_type.h +1 -56
  486. mindspore/include/mindspore/ccsrc/minddata/dataset/core/device_buffer.h +74 -0
  487. mindspore/include/mindspore/ccsrc/minddata/dataset/core/message_queue.h +29 -20
  488. mindspore/include/mindspore/ccsrc/minddata/dataset/core/shared_memory_queue.h +47 -2
  489. mindspore/include/mindspore/ccsrc/minddata/dataset/core/tensor.h +26 -2
  490. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/connector.h +7 -0
  491. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_info.h +61 -0
  492. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h +28 -39
  493. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.h +3 -3
  494. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/gpu_map_job.h +3 -3
  495. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_job.h +2 -2
  496. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.h +12 -2
  497. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/npu_map_job.h +3 -3
  498. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/receive_bridge_op.h +4 -0
  499. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/send_bridge_op.h +1 -1
  500. mindspore/include/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h +0 -1
  501. mindspore/include/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h +9 -0
  502. mindspore/include/mindspore/ccsrc/minddata/dataset/include/dataset/vision_ascend.h +1 -1
  503. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/data/parse_example_op.h +19 -15
  504. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/acl_adapter.h +28 -1
  505. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/AclLiteType.h +6 -1
  506. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/AclLiteUtils.h +6 -1
  507. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h +1 -1
  508. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h +2 -2
  509. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ThreadSafeQueue.h +3 -1
  510. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/VdecHelper.h +9 -12
  511. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/acl_plugin.h +19 -0
  512. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_video.h +29 -10
  513. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_video_utils.h +80 -0
  514. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h +1 -1
  515. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/container.h +100 -0
  516. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/context.h +102 -0
  517. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/format.h +45 -0
  518. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/frame.h +74 -0
  519. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/packet.h +59 -0
  520. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/pyav/stream.h +93 -0
  521. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h +37 -0
  522. mindspore/include/mindspore/ccsrc/minddata/dataset/util/btree.h +1 -1
  523. mindspore/include/mindspore/ccsrc/minddata/dataset/util/command.h +29 -0
  524. mindspore/include/mindspore/ccsrc/minddata/dataset/util/cond_var.h +0 -1
  525. mindspore/include/mindspore/ccsrc/minddata/dataset/util/ftok_key.h +2 -2
  526. mindspore/include/mindspore/ccsrc/minddata/dataset/util/json_helper.h +1 -1
  527. mindspore/include/mindspore/ccsrc/minddata/dataset/util/log_adapter.h +0 -5
  528. mindspore/include/mindspore/ccsrc/minddata/dataset/util/queue.h +1 -1
  529. mindspore/include/mindspore/ccsrc/minddata/dataset/util/sig_handler.h +11 -3
  530. mindspore/include/mindspore/ccsrc/minddata/dataset/util/system_pool.h +1 -1
  531. mindspore/include/mindspore/ccsrc/minddata/dataset/util/task_manager.h +0 -1
  532. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/common/log_adapter.h +0 -5
  533. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h +0 -1
  534. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h +0 -1
  535. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h +0 -1
  536. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h +0 -1
  537. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h +0 -1
  538. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h +0 -1
  539. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h +0 -1
  540. mindspore/include/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h +0 -1
  541. mindspore/include/mindspore/ccsrc/minddata/utils.h +30 -2
  542. mindspore/include/mindspore/ccsrc/ms_extension/all.h +46 -0
  543. mindspore/include/mindspore/ccsrc/ms_extension/api.h +33 -0
  544. mindspore/include/mindspore/ccsrc/ms_extension/ascend/atb/atb_common.h +98 -0
  545. mindspore/include/mindspore/ccsrc/ms_extension/ascend/atb/operation_cache.h +229 -0
  546. mindspore/include/mindspore/ccsrc/ms_extension/common/tensor.h +319 -0
  547. mindspore/include/mindspore/ccsrc/ms_extension/common/tensor_utils.h +83 -0
  548. mindspore/include/mindspore/ccsrc/ms_extension/common/visible.h +28 -0
  549. mindspore/include/mindspore/ccsrc/ms_extension/pynative/pyboost_extension.h +312 -0
  550. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/capture_context.h +1 -1
  551. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/eval_frame_hook.h +2 -2
  552. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_build/build_graph_utils.h +1 -7
  553. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_build/func_graph_builder.h +25 -6
  554. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.h +92 -19
  555. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/cfg.h +19 -5
  556. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/code_generator.h +53 -49
  557. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph.h +40 -10
  558. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.h +3 -19
  559. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_arguments_optimizer.h +145 -0
  560. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h +13 -26
  561. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/node.h +33 -3
  562. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_capture/side_effect.h +116 -27
  563. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/abstract_type.h +2 -2
  564. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/compiler.h +4 -0
  565. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_compiler/utils.h +2 -0
  566. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/cache.h +32 -8
  567. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/guard.h +11 -15
  568. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/guard_utils.h +17 -8
  569. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h +0 -2
  570. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/shape_ctx.h +3 -3
  571. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.h +50 -93
  572. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/jit_compile_results.h +1 -16
  573. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h +11 -22
  574. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/py_code.h +56 -5
  575. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/py_frame.h +6 -7
  576. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/python_adapter/pydef.h +0 -6
  577. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/runtime.h +1 -1
  578. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/opcode_util.h +5 -0
  579. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/stop_trace_reason.h +26 -17
  580. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h +15 -11
  581. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/action.h +3 -4
  582. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/executor_py.h +116 -0
  583. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/graph_executor_py.h +118 -0
  584. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/executor/jit_executor_py.h +68 -0
  585. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/graph_circle_handler.h +35 -0
  586. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.h +6 -0
  587. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/function_block.h +1 -0
  588. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/parse.h +10 -0
  589. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/parse_base.h +17 -3
  590. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/parse/resolve.h +15 -1
  591. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pass.h +4 -1
  592. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pass_config.h +4 -0
  593. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pipeline.h +17 -171
  594. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/resource.h +8 -16
  595. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/async_eval_result.h +2 -2
  596. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/auto_monad.h +1 -1
  597. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/builtin_prim.h +3 -3
  598. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/evaluator.h +14 -12
  599. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/order_enforce.h +1 -1
  600. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/prim.h +13 -76
  601. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/prim_utils.h +78 -0
  602. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/program_specialize.h +1 -1
  603. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/static_analysis.h +4 -5
  604. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/validator.h +3 -0
  605. mindspore/include/mindspore/ccsrc/pipeline/jit/trace/trace_recorder.h +19 -4
  606. mindspore/include/mindspore/ccsrc/pipeline/llm_boost/llm_boost_binder.h +1 -1
  607. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_dma_handle.h +0 -2
  608. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.h +4 -5
  609. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h +2 -2
  610. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.h +69 -0
  611. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_res_manager.h +16 -13
  612. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_optimization.h +0 -1
  613. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h +12 -8
  614. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/stress_detect.h +40 -0
  615. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h +32 -7
  616. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/feature_mgr.h +1 -1
  617. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/profiler/mstx/mstx_dispatcher.h +10 -8
  618. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/add_atb_kernel.h +2 -1
  619. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/atb_adapter.h +24 -0
  620. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/atb_kernel_mod.h +3 -1
  621. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/inplace_grouped_matmul_add_atb_kernel.h +2 -1
  622. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/atb/inplace_matmul_add_atb_kernel.h +39 -0
  623. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm_comm_info.h +1 -0
  624. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm_kernel_mod.h +1 -1
  625. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/lazy_fusion_kernel.h +15 -16
  626. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/lazy_fusion_op.h +119 -93
  627. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h +1 -1
  628. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_all_gather_matmul.h +63 -0
  629. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_all_gather_v.h +51 -0
  630. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_matmul_all_reduce.h +2 -1
  631. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_matmul_reduce_scatter.h +61 -0
  632. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_receive.h +3 -1
  633. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_reduce_scatter_v.h +51 -0
  634. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_send.h +1 -0
  635. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h +26 -11
  636. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
  637. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
  638. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +40 -0
  639. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
  640. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
  641. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
  642. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_barrier.h +1 -1
  643. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
  644. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
  645. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_gather.h +1 -1
  646. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
  647. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_irecv.h +1 -1
  648. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_isend.h +1 -1
  649. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce.h +1 -1
  650. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
  651. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
  652. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +40 -0
  653. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_scatter.h +1 -1
  654. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
  655. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
  656. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
  657. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
  658. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_irecv.h +1 -1
  659. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_isend.h +1 -1
  660. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
  661. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/comm_common.h +4 -4
  662. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather.h +1 -1
  663. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather_into_tensor.h +2 -2
  664. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_gather_into_tensor_uneven.h +38 -0
  665. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_reduce.h +1 -1
  666. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_to_all_v.h +1 -1
  667. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_all_to_all_v_single.h +2 -2
  668. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_broadcast.h +1 -1
  669. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_gather.h +1 -1
  670. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_gather_into_tensor.h +2 -2
  671. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_irecv.h +1 -1
  672. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_isend.h +1 -1
  673. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce.h +1 -1
  674. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter.h +1 -1
  675. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter_tensor.h +2 -2
  676. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_reduce_scatter_tensor_uneven.h +39 -0
  677. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_scatter.h +1 -1
  678. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/dist_comm_scatter_tensor.h +2 -2
  679. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_gather.h +1 -1
  680. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_reduce.h +1 -1
  681. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_all_to_all_v.h +1 -1
  682. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_isend.h +1 -1
  683. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/inner_comm_reduce_scatter.h +1 -1
  684. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/add_rms_norm_quant.h +15 -1
  685. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/dynamic_ntk.h +31 -0
  686. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/fused_add_topk_div.h +31 -0
  687. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/internal_kernel_in_out_map.h +16 -0
  688. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/internal_tiling_cache.h +3 -3
  689. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/kv_scale_cache.h +30 -0
  690. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/mla.h +48 -0
  691. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/mla_preprocess.h +32 -0
  692. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/multi_weight_matmul.h +2 -0
  693. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/apply_rotary_pos_emb.h +46 -0
  694. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/auto_gen/internal_kernel_info_adapter.h +95 -0
  695. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/auto_gen/kernel_info_adapter.h +78 -0
  696. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/flash_attention_score.h +54 -0
  697. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/internal_kernel_info.h +162 -0
  698. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/internal_pyboost_utils.h +121 -0
  699. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/mla.h +53 -0
  700. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/paged_attention.h +91 -0
  701. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/pyboost/reshape_and_cache.h +43 -0
  702. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/internal/swiglu_dynamic_quant.h +32 -0
  703. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/rts/reshape_ext.h +5 -0
  704. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/boost_model_atb.h +1 -1
  705. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/atb_infer.h +7 -9
  706. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/comm.h +82 -0
  707. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/common_op_params.h +77 -0
  708. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/context.h +50 -10
  709. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/graph_op_builder.h +24 -18
  710. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/infer_op_params.h +2331 -671
  711. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/operation.h +29 -11
  712. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/operation_infra.h +78 -0
  713. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/svector.h +19 -22
  714. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/train_op_params.h +215 -24
  715. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/types.h +39 -24
  716. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb/utils.h +7 -9
  717. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/context_factory.h +1 -0
  718. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/event_manager.h +156 -0
  719. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/external_comm_manager.h +68 -0
  720. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/hosttensor_binder.h +0 -1
  721. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/model.h +33 -29
  722. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/error.h +49 -0
  723. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/file_utils.h +86 -0
  724. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_config.h +84 -0
  725. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_error.h +20 -0
  726. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_utils.h +86 -0
  727. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log.h +128 -52
  728. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/ModelTaskExecutor.h +64 -0
  729. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/TaskQueue.h +40 -0
  730. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/check_util.h +80 -0
  731. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/config.h +2 -12
  732. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/file_system.h +35 -0
  733. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/hccl_runner.h +48 -0
  734. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/model_factory.h +1 -3
  735. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/operation_factory.h +3 -3
  736. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/operation_util.h +6 -5
  737. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/share_memory.h +46 -0
  738. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/singleton.h +7 -0
  739. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/statistic.h +1 -0
  740. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/str_split.h +0 -2
  741. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/expander_fallback.h +33 -0
  742. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.h +1 -0
  743. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/matmul_assignadd_fusion.h +61 -0
  744. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_matmul_split_fusion.h +31 -8
  745. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_qbmm_elemwise_fusion.h +46 -0
  746. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_swiglu_fusion_v2.h +52 -0
  747. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/inference_weight_preprocess_utils.h +1 -1
  748. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/moe_init_routing_dyn_quantv2_fusion.h +66 -0
  749. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/rms_norm_quant_fusion.h +24 -1
  750. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/swiglu_dynamic_quant_fusion.h +47 -0
  751. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion_infer/swiglu_reshape_dynamic_quant_fusion.h +47 -0
  752. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_common.h +1 -1
  753. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.h +20 -11
  754. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_somas.h +1 -1
  755. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/mpi_collective_comm_lib.h +3 -1
  756. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h +9 -2
  757. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/contiguous_cpu_kernel.h +8 -6
  758. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h +2 -2
  759. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_kernel_input_info.h +99 -0
  760. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_op_plugin_kernel.h +62 -0
  761. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/joinedstr_cpu_kernel.h +46 -0
  762. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.h +1 -6
  763. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_kernel_task.h +1 -0
  764. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.h +18 -18
  765. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_somas.h +1 -1
  766. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/nvidia_collective_comm_lib.h +3 -1
  767. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_device_address/ascend_device_address.h +27 -16
  768. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_res_manager.h +27 -11
  769. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/capture_graph/ascend_capture_graph.h +45 -0
  770. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ascend_collective_comm_lib.h +6 -5
  771. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ascend_communication_group.h +27 -12
  772. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ccool_collective_comm_lib.h +4 -5
  773. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/ccool_communication_group.h +4 -4
  774. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dummy_ascend_collective_comm_lib.h +3 -5
  775. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dvm_collective_comm_lib.h +5 -5
  776. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/dvm_communication_group.h +1 -1
  777. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/hccl_watch_dog_thread.h +5 -3
  778. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/leaper_trans.h +2 -5
  779. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/lowlatency_collective_comm_lib.h +20 -5
  780. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/multi_ascend_collective_comm_lib.h +6 -6
  781. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/multi_ascend_communication_group.h +4 -2
  782. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/collective/utils.h +83 -0
  783. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/device_context_conf/op_tuning_conf.h +14 -0
  784. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/dvm/dvm.h +246 -0
  785. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hal_manager/ascend_hal_manager.h +1 -0
  786. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hccl_adapter/hccl_adapter.h +31 -3
  787. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/hccl_adapter/plugin/hccl_plugin.h +8 -0
  788. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mbuf_manager/mbuf_receive_manager.h +6 -1
  789. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mbuf_manager/tdt_manager.h +44 -0
  790. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/abstract_ascend_memory_pool_support.h +2 -0
  791. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_dynamic_mem_adapter.h +3 -2
  792. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_adapter.h +1 -0
  793. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_manager.h +1 -0
  794. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_memory_pool.h +14 -0
  795. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_two_pointer_mem_adapter.h +1 -0
  796. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h +11 -9
  797. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/custom_op_proto/cust_array_ops.h +11 -0
  798. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/custom_op_proto/cust_other_ops.h +0 -22
  799. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_base.h +38 -33
  800. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_map.h +5 -2
  801. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_adapter_util.h +3 -0
  802. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/array_ops_declare.h +3 -0
  803. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/hcom_ops_declare.h +3 -0
  804. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/op_declare/transform_fusion_ops_declare.h +0 -6
  805. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/op_adapter/transform_util.h +1 -1
  806. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/stream_manager/ascend_stream_manager.h +6 -4
  807. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/acl_mdl_symbol.h +14 -0
  808. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/acl_rt_symbol.h +6 -0
  809. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/symbol_interface/symbol_utils.h +11 -8
  810. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_device_address/cpu_device_address.h +17 -12
  811. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_hash_table.h +128 -0
  812. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_hash_table_util.h +114 -0
  813. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_memory_manager.h +4 -10
  814. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_mem_manager/cpu_memory_pool.h +3 -1
  815. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_res_manager.h +8 -15
  816. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/visible.h +32 -0
  817. mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/device/gpu_device_address.h +19 -11
  818. mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/gpu_res_manager.h +9 -9
  819. mindspore/include/mindspore/ccsrc/ps/core/collective_ops_impl.h +31 -6
  820. mindspore/include/mindspore/ccsrc/ps/core/communicator/http_request_handler.h +0 -1
  821. mindspore/include/mindspore/ccsrc/ps/core/file_configuration.h +2 -2
  822. mindspore/include/mindspore/ccsrc/ps/core/node.h +1 -1
  823. mindspore/include/mindspore/ccsrc/pybind_api/hal/event_py.h +2 -2
  824. mindspore/include/mindspore/ccsrc/pybind_api/hal/memory_py.h +2 -0
  825. mindspore/include/mindspore/ccsrc/pybind_api/hal/stream_py.h +3 -4
  826. mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_api/auto_generate/tensor_api.h +135 -130
  827. mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_index_py.h +10 -107
  828. mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_register/auto_generate/tensor_py_gen.h +135 -131
  829. mindspore/include/mindspore/ccsrc/pybind_api/ir/tensor_register/tensor_func_reg.h +0 -1
  830. mindspore/include/mindspore/ccsrc/pybind_api/resource/manager.h +2 -2
  831. mindspore/include/mindspore/ccsrc/pybind_api/storage_py.h +36 -0
  832. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/abs.h +1 -1
  833. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/acos_ext.h +1 -1
  834. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/acosh_ext.h +1 -1
  835. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adamw.h +1 -1
  836. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
  837. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool2d_ext.h +1 -1
  838. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
  839. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool3d_ext.h +1 -1
  840. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
  841. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
  842. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
  843. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add.h +1 -1
  844. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_ext.h +1 -1
  845. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
  846. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_layernorm_v2.h +1 -1
  847. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_rms_norm.h +1 -1
  848. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
  849. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/add_scalar.h +1 -1
  850. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addbmm.h +1 -1
  851. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addcdiv_ext.h +1 -1
  852. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addcmul_ext.h +1 -1
  853. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addmm.h +1 -1
  854. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/addmv.h +1 -1
  855. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/all_finite.h +1 -1
  856. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/all_gather_matmul.h +1 -1
  857. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/any.h +44 -0
  858. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/any_ext.h +44 -0
  859. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/apply_rotary_pos_emb.h +44 -0
  860. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/arange.h +1 -1
  861. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmax_ext.h +1 -1
  862. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmax_with_value.h +1 -1
  863. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmin_ext.h +1 -1
  864. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argmin_with_value.h +1 -1
  865. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/argsort.h +1 -1
  866. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/as_strided.h +1 -1
  867. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/asin_ext.h +1 -1
  868. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/asinh_ext.h +1 -1
  869. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atan2_ext.h +1 -1
  870. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atan_ext.h +1 -1
  871. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/atanh.h +1 -1
  872. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool1d.h +1 -1
  873. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool2d.h +1 -1
  874. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
  875. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
  876. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
  877. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/baddbmm.h +1 -1
  878. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_mat_mul.h +1 -1
  879. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_elemt.h +1 -1
  880. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
  881. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_ext.h +1 -1
  882. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
  883. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_grad_ext.h +1 -1
  884. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
  885. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/batch_norm_stats.h +1 -1
  886. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bernoulli_ext.h +1 -1
  887. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy.h +1 -1
  888. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
  889. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
  890. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
  891. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bincount_ext.h +1 -1
  892. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
  893. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
  894. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_not.h +1 -1
  895. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
  896. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
  897. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
  898. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
  899. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/bmm_ext.h +1 -1
  900. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/broadcast_to.h +1 -1
  901. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/broadcast_to_view.h +44 -0
  902. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cast.h +1 -1
  903. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ceil.h +1 -1
  904. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cell_backward_hook.h +44 -0
  905. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/chunk.h +1 -1
  906. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/chunk_view.h +44 -0
  907. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clamp_scalar.h +1 -1
  908. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clamp_tensor.h +1 -1
  909. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/clone.h +1 -1
  910. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/col2im_ext.h +1 -1
  911. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/col2im_grad.h +1 -1
  912. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/concat.h +1 -1
  913. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/constant_pad_nd.h +1 -1
  914. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/contiguous.h +1 -1
  915. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv1d_ext.h +1 -1
  916. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv1d_padding.h +1 -1
  917. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv2d_ext.h +1 -1
  918. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv2d_padding.h +1 -1
  919. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv3d_ext.h +1 -1
  920. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv3d_padding.h +1 -1
  921. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/conv_transpose2d.h +1 -1
  922. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution.h +1 -1
  923. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_grad.h +1 -1
  924. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_str.h +1 -1
  925. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/convolution_str_grad.h +1 -1
  926. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/copy.h +1 -1
  927. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cos.h +1 -1
  928. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cosh.h +1 -1
  929. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/count_nonzero.h +1 -1
  930. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross.h +1 -1
  931. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross_entropy_loss.h +44 -0
  932. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cross_entropy_loss_grad.h +44 -0
  933. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cummax.h +1 -1
  934. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cummin_ext.h +1 -1
  935. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/cumsum_ext.h +1 -1
  936. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/custom_ext.h +1 -1
  937. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dense.h +1 -1
  938. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/diag_ext.h +1 -1
  939. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/diagonal_view.h +44 -0
  940. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
  941. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
  942. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +44 -0
  943. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
  944. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
  945. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
  946. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_barrier.h +1 -1
  947. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
  948. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
  949. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_gather.h +1 -1
  950. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
  951. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_irecv.h +1 -1
  952. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_isend.h +1 -1
  953. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce.h +1 -1
  954. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
  955. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
  956. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +44 -0
  957. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_scatter.h +1 -1
  958. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
  959. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/div.h +1 -1
  960. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divmod.h +1 -1
  961. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divmods.h +1 -1
  962. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/divs.h +1 -1
  963. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dot.h +1 -1
  964. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_do_mask_ext.h +1 -1
  965. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_ext.h +1 -1
  966. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_gen_mask_ext.h +1 -1
  967. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dropout_grad_ext.h +1 -1
  968. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
  969. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/einsum_ext.h +44 -0
  970. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu.h +1 -1
  971. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu_ext.h +1 -1
  972. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/elu_grad_ext.h +1 -1
  973. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/embedding.h +1 -1
  974. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/embedding_dense_backward.h +1 -1
  975. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/empty.h +44 -0
  976. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/empty_like.h +44 -0
  977. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/equal.h +1 -1
  978. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/equal_ext.h +1 -1
  979. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erf.h +1 -1
  980. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erfc.h +1 -1
  981. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/erfinv.h +1 -1
  982. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/exp.h +1 -1
  983. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/exp2.h +1 -1
  984. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_as.h +1 -1
  985. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_dims.h +1 -1
  986. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expand_dims_view.h +44 -0
  987. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/expm1.h +1 -1
  988. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/eye.h +1 -1
  989. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ffn_ext.h +1 -1
  990. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fill_scalar.h +1 -1
  991. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fill_tensor.h +1 -1
  992. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flash_attention_score.h +1 -1
  993. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flash_attention_score_grad.h +1 -1
  994. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/flatten_ext.h +1 -1
  995. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor.h +1 -1
  996. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor_div.h +1 -1
  997. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/floor_div_scalar.h +1 -1
  998. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fmod_scalar.h +1 -1
  999. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fmod_tensor.h +1 -1
  1000. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/frac.h +1 -1
  1001. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/full_like.h +1 -1
  1002. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/func_dropout_ext.h +44 -0
  1003. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/func_max_pool2d.h +44 -0
  1004. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
  1005. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gather_d.h +1 -1
  1006. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
  1007. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gcd.h +1 -1
  1008. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu.h +1 -1
  1009. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_ext.h +1 -1
  1010. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_grad.h +1 -1
  1011. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gelu_grad_ext.h +1 -1
  1012. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/generator.h +1 -1
  1013. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/glu.h +1 -1
  1014. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/glu_grad.h +1 -1
  1015. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm.h +44 -0
  1016. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_backward.h +1 -1
  1017. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_backward_fusion.h +44 -0
  1018. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2.h +44 -0
  1019. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2_backward.h +1 -1
  1020. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/gmm_v2_backward_fusion.h +44 -0
  1021. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater.h +1 -1
  1022. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater_equal.h +1 -1
  1023. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/greater_equal_scalar.h +1 -1
  1024. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_2d.h +1 -1
  1025. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
  1026. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_3d.h +1 -1
  1027. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
  1028. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/group_norm.h +1 -1
  1029. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/group_norm_grad.h +1 -1
  1030. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul.h +1 -1
  1031. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
  1032. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
  1033. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hardtanh.h +1 -1
  1034. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hardtanh_grad.h +1 -1
  1035. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/histc_ext.h +1 -1
  1036. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hshrink.h +1 -1
  1037. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hshrink_grad.h +1 -1
  1038. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hsigmoid.h +1 -1
  1039. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hsigmoid_grad.h +1 -1
  1040. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hswish.h +1 -1
  1041. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/hswish_grad.h +1 -1
  1042. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/identity.h +1 -1
  1043. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/im2col_ext.h +1 -1
  1044. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/incre_flash_attention.h +1 -1
  1045. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index.h +1 -1
  1046. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_add_ext.h +1 -1
  1047. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_fill_scalar.h +1 -1
  1048. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_fill_tensor.h +1 -1
  1049. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/index_select.h +1 -1
  1050. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
  1051. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
  1052. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
  1053. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_irecv.h +1 -1
  1054. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_isend.h +1 -1
  1055. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
  1056. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_index.h +1 -1
  1057. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
  1058. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_moe_token_unpermute.h +44 -0
  1059. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inner_non_zero.h +1 -1
  1060. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_add_ext.h +1 -1
  1061. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_addmm.h +1 -1
  1062. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_adds_ext.h +1 -1
  1063. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_bernoulli_scalar.h +44 -0
  1064. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_bernoulli_tensor.h +44 -0
  1065. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
  1066. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
  1067. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_copy.h +1 -1
  1068. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_div.h +1 -1
  1069. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divmod.h +1 -1
  1070. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divmods.h +1 -1
  1071. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_divs.h +1 -1
  1072. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_elu.h +1 -1
  1073. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_erfinv.h +1 -1
  1074. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_exp.h +1 -1
  1075. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_exponential.h +44 -0
  1076. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
  1077. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
  1078. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
  1079. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor.h +1 -1
  1080. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor_divide.h +1 -1
  1081. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_floor_divides.h +1 -1
  1082. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
  1083. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_hardtanh.h +1 -1
  1084. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_index_add.h +1 -1
  1085. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_index_put.h +1 -1
  1086. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_log.h +1 -1
  1087. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
  1088. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
  1089. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_matmul_add.h +44 -0
  1090. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_mul.h +1 -1
  1091. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_muls.h +1 -1
  1092. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_normal.h +1 -1
  1093. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_put.h +1 -1
  1094. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_random.h +1 -1
  1095. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_relu.h +1 -1
  1096. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +44 -0
  1097. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +44 -0
  1098. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_add.h +1 -1
  1099. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_src.h +1 -1
  1100. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
  1101. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_value.h +1 -1
  1102. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
  1103. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_silu.h +44 -0
  1104. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_stop_gradient.h +1 -1
  1105. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_sub_ext.h +1 -1
  1106. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
  1107. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_tanh.h +1 -1
  1108. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_threshold.h +1 -1
  1109. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_uniform.h +1 -1
  1110. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/inplace_zero.h +1 -1
  1111. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isclose.h +1 -1
  1112. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isfinite.h +1 -1
  1113. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isinf.h +1 -1
  1114. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/isneginf.h +1 -1
  1115. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kl_div.h +1 -1
  1116. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kl_div_grad.h +1 -1
  1117. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kthvalue.h +1 -1
  1118. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
  1119. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/l1_loss_backward_ext.h +1 -1
  1120. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/l1_loss_ext.h +1 -1
  1121. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/layer_norm_ext.h +1 -1
  1122. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/layer_norm_grad_ext.h +1 -1
  1123. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/leaky_relu_ext.h +1 -1
  1124. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
  1125. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lerp.h +1 -1
  1126. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lerp_scalar.h +1 -1
  1127. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/less.h +1 -1
  1128. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/less_equal.h +1 -1
  1129. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/lin_space_ext.h +1 -1
  1130. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/linalg_qr.h +1 -1
  1131. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/linalg_vector_norm.h +1 -1
  1132. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log.h +1 -1
  1133. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log10.h +1 -1
  1134. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log1p.h +1 -1
  1135. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log2.h +1 -1
  1136. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax.h +1 -1
  1137. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax_ext.h +1 -1
  1138. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/log_softmax_grad.h +1 -1
  1139. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logaddexp.h +1 -1
  1140. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logaddexp2.h +1 -1
  1141. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_and.h +1 -1
  1142. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_not.h +1 -1
  1143. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_or.h +1 -1
  1144. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logical_xor.h +1 -1
  1145. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsigmoid.h +1 -1
  1146. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsigmoid_grad.h +1 -1
  1147. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/logsumexp.h +1 -1
  1148. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_fill.h +1 -1
  1149. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_scatter.h +44 -0
  1150. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_select.h +1 -1
  1151. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/masked_select_grad.h +1 -1
  1152. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul.h +1 -1
  1153. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
  1154. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_ext.h +1 -1
  1155. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matmul_reduce_scatter.h +1 -1
  1156. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/matrix_inverse_ext.h +1 -1
  1157. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max.h +1 -1
  1158. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_dim.h +1 -1
  1159. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_grad_with_indices.h +1 -1
  1160. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_grad_with_mask.h +1 -1
  1161. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_with_indices.h +1 -1
  1162. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_pool_with_mask.h +1 -1
  1163. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/max_unpool2d_ext.h +1 -1
  1164. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/maximum.h +1 -1
  1165. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mean_ext.h +1 -1
  1166. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/median_dim.h +1 -1
  1167. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/median_ext.h +1 -1
  1168. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/meshgrid.h +1 -1
  1169. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/min.h +1 -1
  1170. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/min_dim.h +1 -1
  1171. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/minimum.h +1 -1
  1172. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mish_ext.h +1 -1
  1173. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mish_grad_ext.h +1 -1
  1174. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mla.h +44 -0
  1175. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mm_ext.h +1 -1
  1176. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
  1177. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_distribute_combine.h +44 -0
  1178. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_distribute_dispatch.h +44 -0
  1179. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_finalize_routing.h +1 -1
  1180. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
  1181. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing.h +1 -1
  1182. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing_quant_v2.h +44 -0
  1183. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
  1184. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_permute.h +1 -1
  1185. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
  1186. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_unpermute.h +1 -1
  1187. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
  1188. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mse_loss_ext.h +1 -1
  1189. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mse_loss_grad_ext.h +1 -1
  1190. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mul.h +1 -1
  1191. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/muls.h +1 -1
  1192. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
  1193. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
  1194. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/multinomial_ext.h +1 -1
  1195. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/mv.h +1 -1
  1196. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nan_to_num.h +1 -1
  1197. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nansum.h +1 -1
  1198. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/narrow.h +1 -1
  1199. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/narrow_view.h +44 -0
  1200. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ne_scalar.h +1 -1
  1201. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/neg.h +1 -1
  1202. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_empty.h +44 -0
  1203. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_full.h +44 -0
  1204. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_ones.h +1 -1
  1205. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/new_zeros.h +1 -1
  1206. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss.h +1 -1
  1207. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_2d.h +1 -1
  1208. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
  1209. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/nllloss_grad.h +1 -1
  1210. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/non_zero.h +1 -1
  1211. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/non_zero_ext.h +1 -1
  1212. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/norm.h +1 -1
  1213. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_float_float.h +1 -1
  1214. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_float_tensor.h +1 -1
  1215. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_tensor_float.h +1 -1
  1216. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
  1217. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/not_equal.h +1 -1
  1218. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/one_hot_ext.h +1 -1
  1219. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ones.h +1 -1
  1220. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ones_like_ext.h +1 -1
  1221. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/outer.h +1 -1
  1222. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/paged_attention.h +44 -0
  1223. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pixel_shuffle.h +1 -1
  1224. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/polar.h +1 -1
  1225. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow.h +1 -1
  1226. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
  1227. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
  1228. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prelu.h +1 -1
  1229. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prelu_grad.h +1 -1
  1230. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prod_ext.h +1 -1
  1231. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/prompt_flash_attention.h +1 -1
  1232. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_batch_matmul.h +1 -1
  1233. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_matmul.h +44 -0
  1234. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/quant_v2.h +1 -1
  1235. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rand_ext.h +1 -1
  1236. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rand_like_ext.h +1 -1
  1237. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randint.h +1 -1
  1238. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randint_like.h +1 -1
  1239. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randn.h +1 -1
  1240. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randn_like.h +1 -1
  1241. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/randperm_ext.h +1 -1
  1242. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reciprocal.h +1 -1
  1243. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_all.h +1 -1
  1244. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_any.h +1 -1
  1245. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_max.h +1 -1
  1246. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reduce_min.h +1 -1
  1247. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_1d.h +1 -1
  1248. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
  1249. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_2d.h +1 -1
  1250. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
  1251. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_3d.h +1 -1
  1252. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
  1253. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/relu.h +1 -1
  1254. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/relu_grad.h +1 -1
  1255. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_scalar_tensor.h +1 -1
  1256. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_tensor_scalar.h +1 -1
  1257. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/remainder_tensor_tensor.h +1 -1
  1258. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat.h +1 -1
  1259. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_grad.h +1 -1
  1260. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_int.h +1 -1
  1261. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/repeat_interleave_tensor.h +1 -1
  1262. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_1d.h +1 -1
  1263. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
  1264. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_2d.h +1 -1
  1265. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
  1266. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_3d.h +1 -1
  1267. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
  1268. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reshape.h +1 -1
  1269. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reshape_and_cache.h +44 -0
  1270. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/reverse_v2.h +1 -1
  1271. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/ring_attention_update.h +44 -0
  1272. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rms_norm.h +1 -1
  1273. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rms_norm_grad.h +1 -1
  1274. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/roll.h +1 -1
  1275. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rotary_position_embedding.h +1 -1
  1276. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
  1277. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/round.h +1 -1
  1278. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/rsqrt.h +1 -1
  1279. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter.h +1 -1
  1280. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter_add_ext.h +1 -1
  1281. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/scatter_value.h +1 -1
  1282. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/searchsorted.h +1 -1
  1283. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select.h +1 -1
  1284. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_ext_view.h +44 -0
  1285. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_v2.h +1 -1
  1286. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/selu_ext.h +1 -1
  1287. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/selu_grad.h +1 -1
  1288. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sigmoid.h +1 -1
  1289. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sigmoid_grad.h +1 -1
  1290. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sign.h +1 -1
  1291. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silent_check_v2.h +1 -1
  1292. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silent_check_v3.h +1 -1
  1293. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silu.h +1 -1
  1294. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/silu_grad.h +1 -1
  1295. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sin.h +1 -1
  1296. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sinc.h +1 -1
  1297. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sinh.h +1 -1
  1298. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice.h +1 -1
  1299. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice_ext.h +1 -1
  1300. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/slice_ext_view.h +44 -0
  1301. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/smooth_l1_loss.h +1 -1
  1302. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
  1303. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/soft_margin_loss.h +1 -1
  1304. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
  1305. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softmax.h +1 -1
  1306. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softmax_backward.h +1 -1
  1307. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softplus_ext.h +1 -1
  1308. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softplus_grad_ext.h +1 -1
  1309. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softshrink.h +1 -1
  1310. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/softshrink_grad.h +1 -1
  1311. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sort_ext.h +1 -1
  1312. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/speed_fusion_attention.h +1 -1
  1313. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
  1314. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split.h +1 -1
  1315. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_tensor.h +1 -1
  1316. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_tensor_view.h +44 -0
  1317. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_with_size.h +1 -1
  1318. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/split_with_size_view.h +44 -0
  1319. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sqrt.h +1 -1
  1320. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/square.h +1 -1
  1321. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/squeeze.h +1 -1
  1322. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/stack_ext.h +1 -1
  1323. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/std.h +1 -1
  1324. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/std_mean.h +1 -1
  1325. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub.h +1 -1
  1326. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub_ext.h +1 -1
  1327. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sub_scalar.h +1 -1
  1328. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/sum_ext.h +1 -1
  1329. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/swiglu.h +1 -1
  1330. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/swiglu_grad.h +1 -1
  1331. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/t_ext.h +1 -1
  1332. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/take.h +1 -1
  1333. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tan.h +1 -1
  1334. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tanh.h +1 -1
  1335. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tanh_grad.h +1 -1
  1336. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
  1337. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/threshold.h +1 -1
  1338. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/threshold_grad.h +1 -1
  1339. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tile.h +1 -1
  1340. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/topk_ext.h +1 -1
  1341. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/trace_ext.h +1 -1
  1342. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose.h +1 -1
  1343. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_ext_view.h +44 -0
  1344. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_view.h +44 -0
  1345. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/triangular_solve.h +1 -1
  1346. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/tril_ext.h +1 -1
  1347. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/triu.h +1 -1
  1348. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/trunc.h +1 -1
  1349. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/type_as.h +1 -1
  1350. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/uniform_ext.h +1 -1
  1351. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique2.h +1 -1
  1352. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique_consecutive.h +1 -1
  1353. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unique_dim.h +1 -1
  1354. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unstack_ext_view.h +44 -0
  1355. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
  1356. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
  1357. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
  1358. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
  1359. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_linear1d.h +1 -1
  1360. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
  1361. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest1d.h +1 -1
  1362. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
  1363. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest2d.h +1 -1
  1364. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
  1365. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest3d.h +1 -1
  1366. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
  1367. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
  1368. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
  1369. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/var.h +1 -1
  1370. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/var_mean.h +1 -1
  1371. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/view.h +1 -1
  1372. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/view_as.h +1 -1
  1373. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
  1374. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy.h +1 -1
  1375. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
  1376. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
  1377. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/zeros.h +1 -1
  1378. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/zeros_like_ext.h +1 -1
  1379. mindspore/include/mindspore/ccsrc/pyboost/comm_handle.h +6 -2
  1380. mindspore/include/mindspore/ccsrc/pyboost/customize/any.h +39 -0
  1381. mindspore/include/mindspore/ccsrc/pyboost/customize/cell_backward_hook.h +27 -0
  1382. mindspore/include/mindspore/ccsrc/pyboost/customize/divmod.h +3 -3
  1383. mindspore/include/mindspore/ccsrc/pyboost/customize/einsum_ext.h +38 -0
  1384. mindspore/include/mindspore/ccsrc/pyboost/customize/identity.h +2 -2
  1385. mindspore/include/mindspore/ccsrc/pyboost/customize/meshgrid.h +7 -4
  1386. mindspore/include/mindspore/ccsrc/pyboost/customize/op_common.h +12 -15
  1387. mindspore/include/mindspore/ccsrc/pyboost/customize/pixel_shuffle.h +2 -3
  1388. mindspore/include/mindspore/ccsrc/pyboost/customize/reshape.h +4 -3
  1389. mindspore/include/mindspore/ccsrc/pyboost/customize/searchsorted.h +4 -5
  1390. mindspore/include/mindspore/ccsrc/pyboost/functions/auto_generate/auto_grad_op_reg.h +1588 -1384
  1391. mindspore/include/mindspore/ccsrc/pyboost/functions/auto_generate/functions.h +578 -481
  1392. mindspore/include/mindspore/ccsrc/pyboost/grad_functions/pyboost_grad_functions.h +3 -0
  1393. mindspore/include/mindspore/ccsrc/pyboost/grad_functions/value_converter.h +30 -4
  1394. mindspore/include/mindspore/ccsrc/pyboost/op_register.h +52 -0
  1395. mindspore/include/mindspore/ccsrc/pyboost/op_runner.h +45 -19
  1396. mindspore/include/mindspore/ccsrc/pyboost/pyboost_utils.h +106 -62
  1397. mindspore/include/mindspore/ccsrc/pynative/base.h +22 -24
  1398. mindspore/include/mindspore/ccsrc/pynative/forward/do_pyboost_cast.h +71 -67
  1399. mindspore/include/mindspore/ccsrc/pynative/forward/forward.h +14 -12
  1400. mindspore/include/mindspore/ccsrc/pynative/forward/forward_task.h +34 -2
  1401. mindspore/include/mindspore/ccsrc/pynative/grad/custom_function.h +14 -7
  1402. mindspore/include/mindspore/ccsrc/pynative/grad/function/auto_generate/pyboost_native_grad_functions.h +501 -457
  1403. mindspore/include/mindspore/ccsrc/pynative/grad/function/func_builder.h +3 -3
  1404. mindspore/include/mindspore/ccsrc/pynative/grad/function/func_grad.h +280 -96
  1405. mindspore/include/mindspore/ccsrc/pynative/grad/function/func_pass.h +0 -1
  1406. mindspore/include/mindspore/ccsrc/pynative/grad/function.h +28 -23
  1407. mindspore/include/mindspore/ccsrc/pynative/grad/function_py.h +19 -11
  1408. mindspore/include/mindspore/ccsrc/pynative/grad/grad.h +30 -97
  1409. mindspore/include/mindspore/ccsrc/pynative/grad/grad_utils.h +39 -23
  1410. mindspore/include/mindspore/ccsrc/pynative/grad/hook_py.h +21 -22
  1411. mindspore/include/mindspore/ccsrc/pynative/grad/jit/jit_grad.h +2 -26
  1412. mindspore/include/mindspore/ccsrc/pynative/grad/top_cell.h +8 -150
  1413. mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_api.h +564 -0
  1414. mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_core.h +564 -0
  1415. mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/tensor_func_utils.h +498 -483
  1416. mindspore/include/mindspore/ccsrc/pynative/op_function/comm_handle_py.h +2 -0
  1417. mindspore/include/mindspore/ccsrc/pynative/op_function/converter.h +11 -0
  1418. mindspore/include/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h +2 -12
  1419. mindspore/include/mindspore/ccsrc/pynative/predict_out_type_map.h +3 -0
  1420. mindspore/include/mindspore/ccsrc/pynative/pynative_execute.h +6 -2
  1421. mindspore/include/mindspore/ccsrc/pynative/pynative_utils.h +39 -43
  1422. mindspore/include/mindspore/ccsrc/runtime/collective/collective_communication_lib.h +17 -1
  1423. mindspore/include/mindspore/ccsrc/runtime/collective/communication_group.h +5 -0
  1424. mindspore/include/mindspore/ccsrc/runtime/collective/dummy_collective_communication_lib.h +2 -1
  1425. mindspore/include/mindspore/ccsrc/runtime/device/device_address_utils.h +55 -50
  1426. mindspore/include/mindspore/ccsrc/runtime/device/memory_scheduler.h +2 -1
  1427. mindspore/include/mindspore/ccsrc/runtime/device/move_to.h +3 -0
  1428. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/auto_mem_offload.h +0 -1
  1429. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/capture_graph.h +35 -0
  1430. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/hal_res_base.h +20 -17
  1431. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/hal_res_manager.h +7 -3
  1432. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/loadable_device_address.h +1 -1
  1433. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/memory_manager.h +1 -1
  1434. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/swap_manager.h +2 -2
  1435. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/tensor_array.h +1 -1
  1436. mindspore/include/mindspore/ccsrc/runtime/device/res_manager/utils/utils.h +0 -1
  1437. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.h +46 -33
  1438. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_common.h +30 -19
  1439. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.h +4 -3
  1440. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h +0 -4
  1441. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/any_type_kernel_actor.h +7 -61
  1442. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_runner.h +74 -0
  1443. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_switch_runner.h +89 -0
  1444. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.h +37 -41
  1445. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/entrance_actor.h +11 -11
  1446. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h +8 -9
  1447. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/gather_actor.h +6 -7
  1448. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/stack_actor.h +11 -11
  1449. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/switch_actor.h +2 -2
  1450. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/copy_actor.h +12 -14
  1451. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.h +28 -20
  1452. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/data_source_actor.h +10 -58
  1453. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.h +11 -10
  1454. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_aware_actor.h +2 -2
  1455. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/fusion/fusion_actor.h +2 -2
  1456. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h +90 -83
  1457. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_infer_actor.h +3 -1
  1458. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_launch_actor.h +11 -1
  1459. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_async_resize_actor.h +3 -1
  1460. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_infer_actor.h +4 -4
  1461. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_resize_actor.h +4 -4
  1462. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_runner.h +405 -0
  1463. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h +11 -11
  1464. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_alloc_actor.h +3 -3
  1465. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h +2 -3
  1466. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_swap_actor.h +4 -4
  1467. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory_aware_actor.h +7 -7
  1468. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/memory_manager_actor.h +18 -17
  1469. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.h +13 -7
  1470. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/profiler_actor.h +2 -2
  1471. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/recorder_actor.h +2 -2
  1472. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/mux_send_actor.h +1 -1
  1473. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.h +6 -10
  1474. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/rpc_actor.h +2 -14
  1475. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/send_actor.h +4 -4
  1476. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/super_kernel_actor.h +75 -57
  1477. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/any_type_graph_scheduler.h +0 -33
  1478. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.h +13 -1
  1479. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.h +1 -2
  1480. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/device_tensor_copy_store.h +14 -14
  1481. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/device_tensor_store.h +28 -27
  1482. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/execution_order_check/comm_execution_order_check.h +17 -7
  1483. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/execution_order_check/kernel_cache.h +24 -4
  1484. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_capture/graph_capture_manager.h +117 -0
  1485. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.h +4 -71
  1486. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_parameter_store.h +88 -142
  1487. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.h +9 -22
  1488. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/parameter_store.h +4 -0
  1489. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/async_lf_queue.h +97 -0
  1490. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/lf_ring_queue.h +205 -0
  1491. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pipeline/runtime_pipeline.h +71 -0
  1492. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/pre_launch_comm.h +10 -2
  1493. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/rpc_node_scheduler.h +4 -13
  1494. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.h +12 -1
  1495. mindspore/include/mindspore/ccsrc/runtime/hardware/device_context.h +44 -173
  1496. mindspore/include/mindspore/ccsrc/runtime/hardware/device_context_manager.h +1 -1
  1497. mindspore/include/mindspore/ccsrc/runtime/pipeline/async_rqueue.h +2 -2
  1498. mindspore/include/mindspore/ccsrc/runtime/pipeline/ring_queue.h +1 -1
  1499. mindspore/include/mindspore/ccsrc/runtime/pipeline/task/task.h +1 -1
  1500. mindspore/include/mindspore/ccsrc/runtime/pynative/graph_adapter.h +0 -1
  1501. mindspore/include/mindspore/ccsrc/runtime/pynative/ir_converter.h +8 -7
  1502. mindspore/include/mindspore/ccsrc/runtime/pynative/op_runner.h +6 -6
  1503. mindspore/include/mindspore/ccsrc/runtime/pynative/op_runtime_info.h +4 -4
  1504. mindspore/include/mindspore/ccsrc/utils/dlopen_macro.h +2 -2
  1505. mindspore/include/mindspore/core/include/abstract/abstract_function.h +54 -13
  1506. mindspore/include/mindspore/core/include/abstract/abstract_value.h +66 -3
  1507. mindspore/include/mindspore/core/include/abstract/ops/primitive_infer_map.h +1 -1
  1508. mindspore/include/mindspore/core/include/abstract/param_validator.h +3 -2
  1509. mindspore/include/mindspore/core/include/base/bfloat16.h +1 -1
  1510. mindspore/include/mindspore/core/include/base/float16.h +4 -3
  1511. mindspore/include/mindspore/core/include/base/float8_e4m3fn.h +264 -0
  1512. mindspore/include/mindspore/core/include/base/float8_e5m2.h +260 -0
  1513. mindspore/include/mindspore/core/include/base/hifloat8.h +54 -58
  1514. mindspore/include/mindspore/core/include/ir/anf.h +37 -8
  1515. mindspore/include/mindspore/core/include/ir/device_sync.h +17 -1
  1516. mindspore/include/mindspore/core/include/ir/dtype/number.h +123 -9
  1517. mindspore/include/mindspore/core/include/ir/dtype/op_dtype.h +48 -0
  1518. mindspore/include/mindspore/core/include/ir/dtype.h +4 -0
  1519. mindspore/include/mindspore/core/include/ir/func_graph.h +2 -0
  1520. mindspore/include/mindspore/core/include/ir/func_graph_cloner.h +2 -0
  1521. mindspore/include/mindspore/core/include/ir/meta_grad_data.h +4 -13
  1522. mindspore/include/mindspore/core/include/ir/primitive.h +34 -2
  1523. mindspore/include/mindspore/core/include/ir/scalar.h +2 -2
  1524. mindspore/include/mindspore/core/include/ir/scope.h +16 -3
  1525. mindspore/include/mindspore/core/include/ir/tensor.h +922 -41
  1526. mindspore/include/mindspore/core/include/ir/tensor_py_wrapperbase.h +11 -11
  1527. mindspore/include/mindspore/core/include/ir/tensor_storage_info.h +1 -0
  1528. mindspore/include/mindspore/core/include/load_mindir/infer_mindir.h +3 -2
  1529. mindspore/include/mindspore/core/include/mindapi/base/macros.h +3 -3
  1530. mindspore/include/mindspore/core/include/mindapi/base/type_id.h +3 -0
  1531. mindspore/include/mindspore/core/include/mindapi/base/types.h +7 -0
  1532. mindspore/include/mindspore/core/include/ops/op_def.h +2 -31
  1533. mindspore/include/mindspore/core/include/symbolic_shape/operation_builder.h +1 -1
  1534. mindspore/include/mindspore/core/include/utils/anf_utils.h +2 -0
  1535. mindspore/include/mindspore/core/include/utils/callback_handler.h +1 -1
  1536. mindspore/include/mindspore/core/include/utils/compact_set.h +4 -0
  1537. mindspore/include/mindspore/core/include/utils/core_op_utils.h +1 -1
  1538. mindspore/include/mindspore/core/include/utils/device_manager_conf.h +4 -0
  1539. mindspore/include/mindspore/core/include/utils/flags.h +0 -2
  1540. mindspore/include/mindspore/core/include/utils/info.h +7 -0
  1541. mindspore/include/mindspore/core/include/utils/llm_manager.h +2 -0
  1542. mindspore/include/mindspore/core/include/utils/log_adapter.h +11 -2
  1543. mindspore/include/mindspore/core/include/utils/ms_context.h +13 -11
  1544. mindspore/include/mindspore/core/include/utils/ms_exception.h +42 -5
  1545. mindspore/include/mindspore/core/include/utils/ms_utils.h +4 -8
  1546. mindspore/include/mindspore/core/include/utils/ms_utils_secure.h +1 -1
  1547. mindspore/include/mindspore/core/include/utils/phase.h +17 -2
  1548. mindspore/include/mindspore/core/include/utils/system/base.h +1 -1
  1549. mindspore/include/mindspore/core/include/utils/tensor_hook_map.h +30 -0
  1550. mindspore/include/mindspore/core/mindrt/include/actor/op_actor.h +68 -0
  1551. mindspore/include/mindspore/core/mindrt/include/async/async.h +2 -2
  1552. mindspore/include/mindspore/core/mindrt/include/thread/actor_threadpool.h +4 -0
  1553. mindspore/include/mindspore/core/mindrt/include/thread/core_affinity.h +1 -1
  1554. mindspore/include/mindspore/core/mindrt/include/thread/hqueue.h +6 -6
  1555. mindspore/include/mindspore/core/mindrt/include/thread/threadpool.h +6 -2
  1556. mindspore/include/mindspore/ops/grad/grad_utils.h +25 -3
  1557. mindspore/include/mindspore/ops/infer/all_gather_v.h +39 -0
  1558. mindspore/include/mindspore/ops/infer/all_to_all.h +38 -0
  1559. mindspore/include/mindspore/ops/infer/dtype.h +12 -0
  1560. mindspore/include/mindspore/ops/infer/ops_func_impl/acosh.h +2 -9
  1561. mindspore/include/mindspore/ops/infer/ops_func_impl/asinh.h +2 -9
  1562. mindspore/include/mindspore/ops/infer/ops_func_impl/atanh.h +4 -9
  1563. mindspore/include/mindspore/ops/infer/ops_func_impl/batch_norm_ext.h +6 -11
  1564. mindspore/include/mindspore/ops/infer/ops_func_impl/batch_norm_grad_ext.h +5 -4
  1565. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_and_scalar.h +4 -5
  1566. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_and_tensor.h +3 -5
  1567. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_or_scalar.h +3 -10
  1568. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_or_tensor.h +2 -10
  1569. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_xor_scalar.h +3 -10
  1570. mindspore/include/mindspore/ops/infer/ops_func_impl/bitwise_xor_tensor.h +2 -10
  1571. mindspore/include/mindspore/ops/infer/ops_func_impl/broadcast_to.h +0 -1
  1572. mindspore/include/mindspore/ops/infer/ops_func_impl/broadcast_to_view.h +32 -0
  1573. mindspore/include/mindspore/ops/infer/ops_func_impl/cell_backward_hook.h +32 -0
  1574. mindspore/include/mindspore/ops/infer/ops_func_impl/chunk.h +0 -2
  1575. mindspore/include/mindspore/ops/infer/ops_func_impl/chunk_view.h +32 -0
  1576. mindspore/include/mindspore/ops/infer/ops_func_impl/cross_entropy_loss.h +36 -0
  1577. mindspore/include/mindspore/ops/infer/ops_func_impl/cross_entropy_loss_grad.h +36 -0
  1578. mindspore/include/mindspore/ops/infer/ops_func_impl/diagonal_view.h +32 -0
  1579. mindspore/include/mindspore/ops/infer/ops_func_impl/dist_comm_all_gather_into_tensor_uneven.h +33 -0
  1580. mindspore/include/mindspore/ops/infer/ops_func_impl/dist_comm_reduce_scatter_tensor_uneven.h +33 -0
  1581. mindspore/include/mindspore/ops/infer/ops_func_impl/dump_gradient.h +33 -0
  1582. mindspore/include/mindspore/ops/infer/ops_func_impl/dynamic_ntk.h +32 -0
  1583. mindspore/include/mindspore/ops/infer/ops_func_impl/eltwise_op.h +4 -10
  1584. mindspore/include/mindspore/ops/infer/ops_func_impl/empty.h +7 -3
  1585. mindspore/include/mindspore/ops/infer/ops_func_impl/empty_like.h +7 -3
  1586. mindspore/include/mindspore/ops/infer/ops_func_impl/exp.h +3 -4
  1587. mindspore/include/mindspore/ops/infer/ops_func_impl/expand_dims.h +1 -2
  1588. mindspore/include/mindspore/ops/infer/ops_func_impl/expand_dims_view.h +31 -0
  1589. mindspore/include/mindspore/ops/infer/ops_func_impl/fused_add_topk_div.h +56 -0
  1590. mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul.h +13 -4
  1591. mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_base.h +8 -9
  1592. mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_v2.h +7 -1
  1593. mindspore/include/mindspore/ops/infer/ops_func_impl/grouped_matmul_v4.h +3 -1
  1594. mindspore/include/mindspore/ops/infer/ops_func_impl/inner_moe_token_unpermute.h +36 -0
  1595. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_bernoulli_scalar.h +25 -0
  1596. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_bernoulli_tensor.h +40 -0
  1597. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_matmul_add.h +34 -0
  1598. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_remainder_tensor_scalar.h +35 -0
  1599. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_remainder_tensor_tensor.h +35 -0
  1600. mindspore/include/mindspore/ops/infer/ops_func_impl/inplace_silu.h +35 -0
  1601. mindspore/include/mindspore/ops/infer/ops_func_impl/kv_scale_cache.h +48 -0
  1602. mindspore/include/mindspore/ops/infer/ops_func_impl/masked_fill.h +4 -3
  1603. mindspore/include/mindspore/ops/infer/ops_func_impl/masked_scatter.h +37 -0
  1604. mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_fusion_utils.h +6 -0
  1605. mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_fastgelu_add_mul_out1.h +34 -0
  1606. mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_mul_out1.h +34 -0
  1607. mindspore/include/mindspore/ops/infer/ops_func_impl/matmul_split_silu_out2.h +1 -1
  1608. mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_grad_with_indices.h +2 -8
  1609. mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_grad_with_mask.h +4 -2
  1610. mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_with_indices.h +6 -4
  1611. mindspore/include/mindspore/ops/infer/ops_func_impl/max_pool_with_mask.h +6 -4
  1612. mindspore/include/mindspore/ops/infer/ops_func_impl/mla.h +54 -0
  1613. mindspore/include/mindspore/ops/infer/ops_func_impl/mla_preprocess.h +75 -0
  1614. mindspore/include/mindspore/ops/infer/ops_func_impl/moe_distribute_combine.h +34 -0
  1615. mindspore/include/mindspore/ops/infer/ops_func_impl/moe_distribute_dispatch.h +37 -0
  1616. mindspore/include/mindspore/ops/infer/ops_func_impl/moe_init_routing_quant_v2.h +39 -0
  1617. mindspore/include/mindspore/ops/infer/ops_func_impl/narrow.h +0 -1
  1618. mindspore/include/mindspore/ops/infer/ops_func_impl/narrow_view.h +29 -0
  1619. mindspore/include/mindspore/ops/infer/ops_func_impl/neg.h +1 -6
  1620. mindspore/include/mindspore/ops/infer/ops_func_impl/new_empty.h +7 -3
  1621. mindspore/include/mindspore/ops/infer/ops_func_impl/new_full.h +37 -0
  1622. mindspore/include/mindspore/ops/infer/ops_func_impl/normal_float_float.h +1 -0
  1623. mindspore/include/mindspore/ops/infer/ops_func_impl/ones_like.h +2 -6
  1624. mindspore/include/mindspore/ops/infer/ops_func_impl/ones_like_ext.h +1 -2
  1625. mindspore/include/mindspore/ops/infer/ops_func_impl/q_matmul_split_silu_fastgelu_add_mul_out1.h +34 -0
  1626. mindspore/include/mindspore/ops/infer/ops_func_impl/q_matmul_split_silu_mul_out1.h +34 -0
  1627. mindspore/include/mindspore/ops/infer/ops_func_impl/quant_matmul.h +32 -0
  1628. mindspore/include/mindspore/ops/infer/ops_func_impl/reciprocal.h +4 -9
  1629. mindspore/include/mindspore/ops/infer/ops_func_impl/reduce_any.h +4 -5
  1630. mindspore/include/mindspore/ops/infer/ops_func_impl/reduce_arithmetic.h +2 -1
  1631. mindspore/include/mindspore/ops/infer/ops_func_impl/remainder_tensor_scalar.h +4 -4
  1632. mindspore/include/mindspore/ops/infer/ops_func_impl/ring_attention_update.h +36 -0
  1633. mindspore/include/mindspore/ops/infer/ops_func_impl/select_ext_view.h +39 -0
  1634. mindspore/include/mindspore/ops/infer/ops_func_impl/sigmoid.h +4 -5
  1635. mindspore/include/mindspore/ops/infer/ops_func_impl/slice_ext.h +0 -1
  1636. mindspore/include/mindspore/ops/infer/ops_func_impl/slice_ext_view.h +29 -0
  1637. mindspore/include/mindspore/ops/infer/ops_func_impl/split_tensor.h +0 -1
  1638. mindspore/include/mindspore/ops/infer/ops_func_impl/split_tensor_view.h +32 -0
  1639. mindspore/include/mindspore/ops/infer/ops_func_impl/split_with_size.h +0 -2
  1640. mindspore/include/mindspore/ops/infer/ops_func_impl/split_with_size_view.h +32 -0
  1641. mindspore/include/mindspore/ops/infer/ops_func_impl/square.h +2 -10
  1642. mindspore/include/mindspore/ops/infer/ops_func_impl/swiglu_dynamic_quant.h +32 -0
  1643. mindspore/include/mindspore/ops/infer/ops_func_impl/topprouter.h +36 -0
  1644. mindspore/include/mindspore/ops/infer/ops_func_impl/transpose.h +0 -2
  1645. mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_ext_view.h +34 -0
  1646. mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_view.h +29 -0
  1647. mindspore/include/mindspore/ops/infer/ops_func_impl/unstack_ext_view.h +37 -0
  1648. mindspore/include/mindspore/ops/infer/ops_func_impl/zeros_like_ext.h +1 -2
  1649. mindspore/include/mindspore/ops/infer/reduce_scatter.h +3 -1
  1650. mindspore/include/mindspore/ops/infer/reduce_scatter_v.h +38 -0
  1651. mindspore/include/mindspore/ops/kernel/ascend/acl/acl_kernel_mod.h +3 -0
  1652. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/acl_adapter_info.h +1 -1
  1653. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_aclnn_utils.h +95 -0
  1654. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_op_api_cache.h +40 -0
  1655. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/custom/custom_op_api_exec.h +84 -0
  1656. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_cache.h +18 -8
  1657. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_convert.h +40 -114
  1658. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_exec.h +41 -32
  1659. mindspore/include/mindspore/ops/kernel/ascend/acl_ir/op_api_util.h +6 -0
  1660. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/common/kernel_base.h +1 -1
  1661. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/common/kernel_log.h +11 -11
  1662. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/format_transfer/formats_definitions.h +5 -1
  1663. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/format_transfer/register_format_transfer.h +5 -1
  1664. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/inc/ms_cpu_kernel.h +1 -1
  1665. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/concat.h +1 -1
  1666. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/dct.h +1 -1
  1667. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/dctn.h +1 -1
  1668. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fft_ortho.h +1 -1
  1669. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fft_shapecopy.h +1 -1
  1670. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftbase.h +1 -1
  1671. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftfreq.h +1 -1
  1672. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/fftnbase.h +1 -1
  1673. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/irfft_double.h +1 -1
  1674. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/nms_with_mask.h +0 -1
  1675. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/random/philox_random_dist.h +1 -1
  1676. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/random/random_distributions.h +27 -25
  1677. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/topprouter.h +64 -0
  1678. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h +18 -15
  1679. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/fused_sparse_utils.h +1 -1
  1680. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/kernel_util.h +2 -2
  1681. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/philox_random.h +75 -138
  1682. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/range_sampler.h +7 -3
  1683. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sampling_kernels.h +18 -15
  1684. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sparse_group.h +18 -15
  1685. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/utils/sparse_tensor.h +18 -15
  1686. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_avg_pool_3d_grad_op.h +0 -11
  1687. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_avg_pool_3d_op.h +0 -11
  1688. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool3_d_grad_op.h +0 -14
  1689. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool3d_op.h +0 -18
  1690. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adaptive_max_pool_2d_grad_op.h +0 -14
  1691. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/adjust_contrastv2_op.h +0 -21
  1692. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/arg_max_op.h +0 -22
  1693. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/bartlett_window_op.h +0 -19
  1694. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/cauchy_op.h +0 -11
  1695. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/cholesky_solve_op.h +0 -23
  1696. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/coalesce_op.h +0 -24
  1697. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/csr_sparse_matrix_to_dense_op.h +0 -15
  1698. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/dense_to_csr_sparse_matrix_op.h +0 -16
  1699. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/eig_op.h +0 -17
  1700. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/exp.h +0 -18
  1701. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/fractional_max_pool_grad_with_fixed_ksize_op.h +0 -22
  1702. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/fractional_max_pool_with_fixed_ksize_op.h +0 -19
  1703. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/geqrf_op.h +0 -14
  1704. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/glu_grad_op.h +0 -17
  1705. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/glu_op.h +0 -20
  1706. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/hamming_window_op.h +0 -20
  1707. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/index_fill.h +0 -18
  1708. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/instance_norm_v2_grad.h +0 -28
  1709. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/layer_norm_grad_grad_op.h +0 -17
  1710. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/log_normal_reverse.h +0 -15
  1711. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/logspace.h +0 -23
  1712. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/lstsq_op.h +0 -15
  1713. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/matrix_logarithm.h +0 -13
  1714. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/matrix_power_op.h +0 -16
  1715. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/max_pool_3d_grad_with_argmax_op.h +0 -26
  1716. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/maximum_grad_grad.h +0 -19
  1717. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/median_grad_op.h +0 -19
  1718. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/median_op.h +0 -17
  1719. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/minimum_grad_grad.h +0 -19
  1720. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/multi_margin_loss_grad_op.h +0 -24
  1721. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/multi_margin_loss_op.h +0 -19
  1722. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/mvlgamma_grad_op.h +0 -17
  1723. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/mvlgamma_op.h +0 -15
  1724. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/pdist_grad_op.h +0 -21
  1725. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/segment_mean_op.h +0 -18
  1726. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/segment_min_op.h +0 -19
  1727. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_addmm.h +0 -16
  1728. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_adagrad_da.h +0 -38
  1729. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_centered_rms_prop.h +0 -47
  1730. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_momentum.h +0 -36
  1731. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_apply_proximal_gradient_descent.h +0 -29
  1732. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_matrix_transpose_op.h +0 -29
  1733. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_mean_with_num_segments_op.h +0 -19
  1734. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_grad_op.h +0 -21
  1735. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_op.h +0 -18
  1736. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_segment_sqrt_n_with_num_segments_op.h +0 -20
  1737. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sparse_tensor_to_csr_sparse_matrix_op.h +0 -18
  1738. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/sspaddmm_op.h +0 -22
  1739. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/inc/triplet_margin_loss_op.h +0 -22
  1740. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/axis_util.h +5 -1
  1741. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/reduce_infer_util.h +1 -2
  1742. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/customize/op_proto/utils/transfer_shape_according_to_format.h +5 -1
  1743. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/drop_out_gen_mask_kernels.h +2 -2
  1744. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/gather_grad_kernels.h +1 -1
  1745. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/replay_buffer/replay_buffer_factory.h +2 -1
  1746. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_util.h +2 -0
  1747. mindspore/include/mindspore/ops/kernel/ascend/availability/silent_check/ascend_silent_check.h +13 -14
  1748. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addbmm_aclnn_kernel.h +1 -1
  1749. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addmm_aclnn_kernel.h +1 -1
  1750. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/addmv_aclnn_kernel.h +1 -1
  1751. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/baddbmm_aclnn_kernel.h +1 -1
  1752. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/batch_norm_ext_aclnn_kernel.h +1 -0
  1753. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/batch_norm_grad_ext_aclnn_kernel.h +2 -1
  1754. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/bincount_ext_aclnn_kernel.h +2 -2
  1755. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/chunk_aclnn_kernel.h +2 -2
  1756. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/contiguous_aclnn_kernel.h +40 -0
  1757. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv1d_ext_aclnn_kernel.h +2 -0
  1758. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv1d_padding_aclnn_kernel.h +2 -1
  1759. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv2d_ext_aclnn_kernel.h +1 -0
  1760. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv2d_padding_aclnn_kernel.h +3 -1
  1761. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv3d_ext_aclnn_kernel.h +4 -0
  1762. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/convolution_str_aclnn_kernel.h +1 -1
  1763. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/cross_entropy_loss_aclnn_kernel.h +48 -0
  1764. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/cross_entropy_loss_grad_aclnn_kernel.h +47 -0
  1765. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_aclnn_kernel.h +5 -1
  1766. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_aclnn_utils.h +2 -1
  1767. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/custom_v2_aclnn_kernel.h +83 -0
  1768. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/dense_aclnn_kernel.h +13 -6
  1769. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/dropout_ext_aclnn_kernel.h +3 -3
  1770. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/empty_aclnn_kernel.h +39 -0
  1771. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/empty_like_aclnn_kernel.h +39 -0
  1772. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/flash_attention_score_aclnn_kernel.h +1 -0
  1773. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/flash_attention_score_grad_aclnn_kernel.h +1 -0
  1774. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/gather_d_grad_v2_aclnn_kernel.h +1 -1
  1775. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grid_sampler_2d_grad_aclnn_kernel.h +3 -3
  1776. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grid_sampler_3d_grad_aclnn_kernel.h +3 -3
  1777. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grouped_matmul_v2_aclnn_kernel.h +49 -0
  1778. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/grouped_matmul_v4_aclnn_kernel.h +6 -3
  1779. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_add_ext_aclnn_kernel.h +1 -1
  1780. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_fill_scalar_aclnn_kernel.h +1 -1
  1781. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/index_fill_tensor_aclnn_kernel.h +1 -1
  1782. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inner_inplace_index_put_aclnn_kernel.h +1 -0
  1783. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inner_moe_token_unpermute_aclnn_kernel.h +45 -0
  1784. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_bernoulli_scalar_aclnn_kernel.h +47 -0
  1785. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_bernoulli_tensor_aclnn_kernel.h +46 -0
  1786. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_clamp_scalar_aclnn_kernel.h +2 -0
  1787. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_divs_aclnn_kernel.h +41 -0
  1788. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_index_add_aclnn_kernel.h +1 -1
  1789. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_normal_aclnn_kernel.h +2 -2
  1790. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_scatter_add_aclnn_kernel.h +45 -0
  1791. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_silu_aclnn_kernel.h +42 -0
  1792. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_sub_scalar_aclnn_kernel.h +41 -0
  1793. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/inplace_uniform_aclnn_kernel.h +2 -2
  1794. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/isinf_aclnn_kernel.h +1 -1
  1795. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/linalg_vector_norm_aclnn_kernel.h +1 -1
  1796. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/masked_scatter_aclnn_kernel.h +45 -0
  1797. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/matmul_all_reduce_aclnn_kernel.h +2 -2
  1798. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/matmul_reduce_scatter_aclnn_kernel.h +4 -4
  1799. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_distribute_combine_aclnn_kernel.h +56 -0
  1800. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_distribute_dispatch_aclnn_kernel.h +55 -0
  1801. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_init_routing_quant_v2_aclnn_kernel.h +50 -0
  1802. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/mse_loss_ext_aclnn_kernel.h +1 -1
  1803. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/mse_loss_grad_ext_aclnn_kernel.h +1 -1
  1804. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/multinomial_ext_aclnn_kernel.h +1 -1
  1805. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/narrow_aclnn_kernel.h +3 -3
  1806. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/new_empty_aclnn_kernel.h +39 -0
  1807. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/new_full_aclnn_kernel.h +41 -0
  1808. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/norm_aclnn_kernel.h +1 -1
  1809. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/prod_ext_aclnn_kernel.h +1 -1
  1810. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/quant_batch_matmul_all_reduce_aclnn_kernel.h +2 -2
  1811. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/rand_ext_aclnn_kernel.h +2 -2
  1812. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randint_aclnn_kernel.h +4 -4
  1813. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randn_aclnn_kernel.h +2 -2
  1814. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/randperm_ext_aclnn_kernel.h +2 -2
  1815. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/repeat_interleave_grad_aclnn_kernel.h +1 -1
  1816. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/ring_attention_update_aclnn_kernel.h +41 -0
  1817. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/split_with_size_aclnn_kernel.h +1 -1
  1818. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique2_aclnn_kernel.h +3 -0
  1819. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique_consecutive_aclnn_kernel.h +2 -0
  1820. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/unique_dim_aclnn_kernel.h +3 -0
  1821. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/broadcast_to_view.h +42 -0
  1822. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/chunk_view.h +42 -0
  1823. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/concat_view.h +1 -1
  1824. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/diagonal_view.h +42 -0
  1825. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/expand_dims_view.h +42 -0
  1826. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/flatten_view.h +42 -0
  1827. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/narrow_view.h +43 -0
  1828. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/reshape_view.h +2 -1
  1829. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/select_ext_view.h +42 -0
  1830. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/slice_ext_view.h +42 -0
  1831. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_tensor_view.h +42 -0
  1832. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_view.h +1 -1
  1833. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/split_with_size_view.h +42 -0
  1834. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/squeeze_view.h +42 -0
  1835. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/strided_slice_view.h +1 -1
  1836. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/transpose_view.h +1 -1
  1837. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/unstack_ext_view.h +42 -0
  1838. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/view.h +42 -0
  1839. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/view_utils.h +0 -1
  1840. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/apply_rotary_pos_emb_aclnn_kernel.h +41 -0
  1841. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/mla_aclnn_kernel.h +41 -0
  1842. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/paged_attention_aclnn_kernel.h +41 -0
  1843. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_kernel_mod.h +139 -23
  1844. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_kernel_utils.h +3 -3
  1845. mindspore/include/mindspore/ops/kernel/ascend/pyboost/aclnn_utils.h +42 -17
  1846. mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_runner.h +124 -0
  1847. mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_runner_base.h +48 -0
  1848. mindspore/include/mindspore/ops/kernel/ascend/pyboost/atb_utils.h +63 -0
  1849. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/abs.h +1 -1
  1850. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/acos_ext.h +1 -1
  1851. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/acosh_ext.h +1 -1
  1852. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adamw.h +1 -1
  1853. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
  1854. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool2d_ext.h +1 -1
  1855. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
  1856. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool3d_ext.h +1 -1
  1857. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
  1858. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
  1859. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
  1860. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add.h +1 -1
  1861. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_ext.h +1 -1
  1862. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
  1863. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_layernorm_v2.h +1 -1
  1864. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_rms_norm.h +1 -1
  1865. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
  1866. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/add_scalar.h +1 -1
  1867. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addbmm.h +1 -1
  1868. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addcdiv_ext.h +1 -1
  1869. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addcmul_ext.h +1 -1
  1870. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addmm.h +1 -1
  1871. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/addmv.h +1 -1
  1872. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/all_finite.h +1 -1
  1873. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/all_gather_matmul.h +1 -1
  1874. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/any.h +40 -0
  1875. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/any_ext.h +40 -0
  1876. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/apply_rotary_pos_emb.h +40 -0
  1877. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/arange.h +1 -1
  1878. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmax_ext.h +1 -1
  1879. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmax_with_value.h +1 -1
  1880. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmin_ext.h +1 -1
  1881. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argmin_with_value.h +1 -1
  1882. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/argsort.h +1 -1
  1883. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/as_strided.h +1 -1
  1884. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/asin_ext.h +1 -1
  1885. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/asinh_ext.h +1 -1
  1886. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atan2_ext.h +1 -1
  1887. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atan_ext.h +1 -1
  1888. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/atanh.h +1 -1
  1889. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool1d.h +1 -1
  1890. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool2d.h +1 -1
  1891. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
  1892. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
  1893. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
  1894. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/baddbmm.h +1 -1
  1895. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_mat_mul.h +1 -1
  1896. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_elemt.h +1 -1
  1897. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
  1898. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_ext.h +1 -1
  1899. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
  1900. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_grad_ext.h +1 -1
  1901. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
  1902. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/batch_norm_stats.h +1 -1
  1903. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bernoulli_ext.h +1 -1
  1904. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy.h +1 -1
  1905. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
  1906. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
  1907. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
  1908. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bincount_ext.h +1 -1
  1909. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
  1910. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
  1911. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_not.h +1 -1
  1912. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
  1913. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
  1914. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
  1915. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
  1916. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/bmm_ext.h +1 -1
  1917. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/broadcast_to.h +1 -1
  1918. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/broadcast_to_view.h +40 -0
  1919. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cast.h +1 -1
  1920. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ceil.h +1 -1
  1921. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cell_backward_hook.h +40 -0
  1922. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/chunk.h +1 -1
  1923. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/chunk_view.h +40 -0
  1924. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clamp_scalar.h +1 -1
  1925. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clamp_tensor.h +1 -1
  1926. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/clone.h +1 -1
  1927. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/col2im_ext.h +1 -1
  1928. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/col2im_grad.h +1 -1
  1929. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/concat.h +1 -1
  1930. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/constant_pad_nd.h +1 -1
  1931. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/contiguous.h +1 -1
  1932. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv1d_ext.h +1 -1
  1933. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv1d_padding.h +1 -1
  1934. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv2d_ext.h +1 -1
  1935. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv2d_padding.h +1 -1
  1936. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv3d_ext.h +1 -1
  1937. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv3d_padding.h +1 -1
  1938. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/conv_transpose2d.h +1 -1
  1939. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution.h +1 -1
  1940. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_grad.h +1 -1
  1941. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_str.h +1 -1
  1942. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/convolution_str_grad.h +1 -1
  1943. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/copy.h +1 -1
  1944. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cos.h +1 -1
  1945. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cosh.h +1 -1
  1946. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/count_nonzero.h +1 -1
  1947. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross.h +1 -1
  1948. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross_entropy_loss.h +40 -0
  1949. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cross_entropy_loss_grad.h +40 -0
  1950. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cummax.h +1 -1
  1951. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cummin_ext.h +1 -1
  1952. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/cumsum_ext.h +1 -1
  1953. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/custom_ext.h +1 -1
  1954. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dense.h +1 -1
  1955. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/diag_ext.h +1 -1
  1956. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/diagonal_view.h +40 -0
  1957. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/div.h +1 -1
  1958. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divmod.h +1 -1
  1959. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divmods.h +1 -1
  1960. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/divs.h +1 -1
  1961. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dot.h +1 -1
  1962. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_do_mask_ext.h +1 -1
  1963. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_ext.h +1 -1
  1964. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_gen_mask_ext.h +1 -1
  1965. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dropout_grad_ext.h +1 -1
  1966. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
  1967. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/einsum_ext.h +40 -0
  1968. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu.h +1 -1
  1969. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu_ext.h +1 -1
  1970. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/elu_grad_ext.h +1 -1
  1971. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/embedding.h +1 -1
  1972. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/embedding_dense_backward.h +1 -1
  1973. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/empty.h +40 -0
  1974. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/empty_like.h +40 -0
  1975. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/equal.h +1 -1
  1976. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/equal_ext.h +1 -1
  1977. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erf.h +1 -1
  1978. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erfc.h +1 -1
  1979. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/erfinv.h +1 -1
  1980. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/exp.h +1 -1
  1981. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/exp2.h +1 -1
  1982. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_as.h +1 -1
  1983. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_dims.h +1 -1
  1984. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expand_dims_view.h +40 -0
  1985. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/expm1.h +1 -1
  1986. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/eye.h +1 -1
  1987. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ffn_ext.h +1 -1
  1988. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fill_scalar.h +1 -1
  1989. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fill_tensor.h +1 -1
  1990. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flash_attention_score.h +1 -1
  1991. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flash_attention_score_grad.h +1 -1
  1992. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/flatten_ext.h +1 -1
  1993. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor.h +1 -1
  1994. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor_div.h +1 -1
  1995. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/floor_div_scalar.h +1 -1
  1996. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fmod_scalar.h +1 -1
  1997. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fmod_tensor.h +1 -1
  1998. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/frac.h +1 -1
  1999. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/full_like.h +1 -1
  2000. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/func_dropout_ext.h +40 -0
  2001. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/func_max_pool2d.h +40 -0
  2002. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
  2003. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gather_d.h +1 -1
  2004. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
  2005. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gcd.h +1 -1
  2006. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu.h +1 -1
  2007. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_ext.h +1 -1
  2008. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_grad.h +1 -1
  2009. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gelu_grad_ext.h +1 -1
  2010. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/generator.h +1 -1
  2011. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/glu.h +1 -1
  2012. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/glu_grad.h +1 -1
  2013. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm.h +40 -0
  2014. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_backward.h +1 -1
  2015. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_backward_fusion.h +40 -0
  2016. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2.h +40 -0
  2017. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2_backward.h +1 -1
  2018. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/gmm_v2_backward_fusion.h +40 -0
  2019. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater.h +1 -1
  2020. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater_equal.h +1 -1
  2021. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/greater_equal_scalar.h +1 -1
  2022. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_2d.h +1 -1
  2023. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
  2024. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_3d.h +1 -1
  2025. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
  2026. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/group_norm.h +1 -1
  2027. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/group_norm_grad.h +1 -1
  2028. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul.h +1 -1
  2029. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
  2030. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
  2031. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hardtanh.h +1 -1
  2032. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hardtanh_grad.h +1 -1
  2033. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/histc_ext.h +1 -1
  2034. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hshrink.h +1 -1
  2035. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hshrink_grad.h +1 -1
  2036. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hsigmoid.h +1 -1
  2037. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hsigmoid_grad.h +1 -1
  2038. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hswish.h +1 -1
  2039. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/hswish_grad.h +1 -1
  2040. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/identity.h +1 -1
  2041. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/im2col_ext.h +1 -1
  2042. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/incre_flash_attention.h +1 -1
  2043. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index.h +1 -1
  2044. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_add_ext.h +1 -1
  2045. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_fill_scalar.h +1 -1
  2046. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_fill_tensor.h +1 -1
  2047. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/index_select.h +1 -1
  2048. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_index.h +1 -1
  2049. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
  2050. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_moe_token_unpermute.h +40 -0
  2051. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inner_non_zero.h +1 -1
  2052. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_add_ext.h +1 -1
  2053. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_addmm.h +1 -1
  2054. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_adds_ext.h +1 -1
  2055. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_bernoulli_scalar.h +40 -0
  2056. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_bernoulli_tensor.h +40 -0
  2057. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
  2058. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
  2059. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_copy.h +1 -1
  2060. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_div.h +1 -1
  2061. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divmod.h +1 -1
  2062. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divmods.h +1 -1
  2063. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_divs.h +1 -1
  2064. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_elu.h +1 -1
  2065. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_erfinv.h +1 -1
  2066. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_exp.h +1 -1
  2067. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_exponential.h +40 -0
  2068. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
  2069. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
  2070. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
  2071. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor.h +1 -1
  2072. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor_divide.h +1 -1
  2073. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_floor_divides.h +1 -1
  2074. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
  2075. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_hardtanh.h +1 -1
  2076. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_index_add.h +1 -1
  2077. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_index_put.h +1 -1
  2078. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_log.h +1 -1
  2079. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
  2080. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
  2081. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_matmul_add.h +40 -0
  2082. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_mul.h +1 -1
  2083. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_muls.h +1 -1
  2084. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_normal.h +1 -1
  2085. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_put.h +1 -1
  2086. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_random.h +1 -1
  2087. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_relu.h +1 -1
  2088. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +40 -0
  2089. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +40 -0
  2090. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_add.h +1 -1
  2091. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_src.h +1 -1
  2092. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
  2093. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_value.h +1 -1
  2094. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
  2095. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_silu.h +40 -0
  2096. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_stop_gradient.h +1 -1
  2097. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_sub_ext.h +1 -1
  2098. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
  2099. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_tanh.h +1 -1
  2100. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_threshold.h +1 -1
  2101. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_uniform.h +1 -1
  2102. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/inplace_zero.h +1 -1
  2103. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isclose.h +1 -1
  2104. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isfinite.h +1 -1
  2105. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isinf.h +1 -1
  2106. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/isneginf.h +1 -1
  2107. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kl_div.h +1 -1
  2108. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kl_div_grad.h +1 -1
  2109. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kthvalue.h +1 -1
  2110. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
  2111. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/l1_loss_backward_ext.h +1 -1
  2112. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/l1_loss_ext.h +1 -1
  2113. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/layer_norm_ext.h +1 -1
  2114. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/layer_norm_grad_ext.h +1 -1
  2115. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/leaky_relu_ext.h +1 -1
  2116. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
  2117. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lerp.h +1 -1
  2118. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lerp_scalar.h +1 -1
  2119. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/less.h +1 -1
  2120. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/less_equal.h +1 -1
  2121. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/lin_space_ext.h +1 -1
  2122. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/linalg_qr.h +1 -1
  2123. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/linalg_vector_norm.h +1 -1
  2124. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log.h +1 -1
  2125. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log10.h +1 -1
  2126. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log1p.h +1 -1
  2127. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log2.h +1 -1
  2128. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax.h +1 -1
  2129. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax_ext.h +1 -1
  2130. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/log_softmax_grad.h +1 -1
  2131. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logaddexp.h +1 -1
  2132. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logaddexp2.h +1 -1
  2133. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_and.h +1 -1
  2134. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_not.h +1 -1
  2135. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_or.h +1 -1
  2136. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logical_xor.h +1 -1
  2137. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsigmoid.h +1 -1
  2138. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsigmoid_grad.h +1 -1
  2139. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/logsumexp.h +1 -1
  2140. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_fill.h +1 -1
  2141. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_scatter.h +40 -0
  2142. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_select.h +1 -1
  2143. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/masked_select_grad.h +1 -1
  2144. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul.h +1 -1
  2145. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
  2146. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_ext.h +1 -1
  2147. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matmul_reduce_scatter.h +1 -1
  2148. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/matrix_inverse_ext.h +1 -1
  2149. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max.h +1 -1
  2150. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_dim.h +1 -1
  2151. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_grad_with_indices.h +1 -1
  2152. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_grad_with_mask.h +1 -1
  2153. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_with_indices.h +1 -1
  2154. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_pool_with_mask.h +1 -1
  2155. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/max_unpool2d_ext.h +1 -1
  2156. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/maximum.h +1 -1
  2157. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mean_ext.h +1 -1
  2158. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/median_dim.h +1 -1
  2159. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/median_ext.h +1 -1
  2160. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/meshgrid.h +1 -1
  2161. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/min.h +1 -1
  2162. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/min_dim.h +1 -1
  2163. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/minimum.h +1 -1
  2164. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mish_ext.h +1 -1
  2165. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mish_grad_ext.h +1 -1
  2166. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mla.h +40 -0
  2167. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mm_ext.h +1 -1
  2168. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
  2169. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_distribute_combine.h +40 -0
  2170. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_distribute_dispatch.h +40 -0
  2171. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_finalize_routing.h +1 -1
  2172. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
  2173. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing.h +1 -1
  2174. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing_quant_v2.h +40 -0
  2175. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
  2176. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_permute.h +1 -1
  2177. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
  2178. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_unpermute.h +1 -1
  2179. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
  2180. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mse_loss_ext.h +1 -1
  2181. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mse_loss_grad_ext.h +1 -1
  2182. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mul.h +1 -1
  2183. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/muls.h +1 -1
  2184. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
  2185. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
  2186. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/multinomial_ext.h +1 -1
  2187. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/mv.h +1 -1
  2188. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nan_to_num.h +1 -1
  2189. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nansum.h +1 -1
  2190. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/narrow.h +1 -1
  2191. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/narrow_view.h +40 -0
  2192. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ne_scalar.h +1 -1
  2193. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/neg.h +1 -1
  2194. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_empty.h +40 -0
  2195. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_full.h +40 -0
  2196. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_ones.h +1 -1
  2197. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/new_zeros.h +1 -1
  2198. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss.h +1 -1
  2199. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_2d.h +1 -1
  2200. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
  2201. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/nllloss_grad.h +1 -1
  2202. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/non_zero.h +1 -1
  2203. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/non_zero_ext.h +1 -1
  2204. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/norm.h +1 -1
  2205. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_float_float.h +1 -1
  2206. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_float_tensor.h +1 -1
  2207. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_tensor_float.h +1 -1
  2208. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
  2209. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/not_equal.h +1 -1
  2210. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/one_hot_ext.h +1 -1
  2211. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ones.h +1 -1
  2212. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ones_like_ext.h +1 -1
  2213. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/outer.h +1 -1
  2214. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/paged_attention.h +40 -0
  2215. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pixel_shuffle.h +1 -1
  2216. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/polar.h +1 -1
  2217. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow.h +1 -1
  2218. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
  2219. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
  2220. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prelu.h +1 -1
  2221. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prelu_grad.h +1 -1
  2222. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prod_ext.h +1 -1
  2223. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/prompt_flash_attention.h +1 -1
  2224. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_batch_matmul.h +1 -1
  2225. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_matmul.h +40 -0
  2226. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/quant_v2.h +1 -1
  2227. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rand_ext.h +1 -1
  2228. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rand_like_ext.h +1 -1
  2229. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randint.h +1 -1
  2230. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randint_like.h +1 -1
  2231. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randn.h +1 -1
  2232. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randn_like.h +1 -1
  2233. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/randperm_ext.h +1 -1
  2234. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reciprocal.h +1 -1
  2235. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_all.h +1 -1
  2236. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_any.h +1 -1
  2237. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_max.h +1 -1
  2238. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reduce_min.h +1 -1
  2239. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_1d.h +1 -1
  2240. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
  2241. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_2d.h +1 -1
  2242. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
  2243. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_3d.h +1 -1
  2244. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
  2245. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/relu.h +1 -1
  2246. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/relu_grad.h +1 -1
  2247. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_scalar_tensor.h +1 -1
  2248. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_tensor_scalar.h +1 -1
  2249. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/remainder_tensor_tensor.h +1 -1
  2250. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat.h +1 -1
  2251. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_grad.h +1 -1
  2252. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_int.h +1 -1
  2253. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/repeat_interleave_tensor.h +1 -1
  2254. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_1d.h +1 -1
  2255. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
  2256. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_2d.h +1 -1
  2257. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
  2258. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_3d.h +1 -1
  2259. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
  2260. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reshape.h +1 -1
  2261. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reshape_and_cache.h +40 -0
  2262. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/reverse_v2.h +1 -1
  2263. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/ring_attention_update.h +40 -0
  2264. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rms_norm.h +1 -1
  2265. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rms_norm_grad.h +1 -1
  2266. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/roll.h +1 -1
  2267. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rotary_position_embedding.h +1 -1
  2268. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
  2269. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/round.h +1 -1
  2270. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/rsqrt.h +1 -1
  2271. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter.h +1 -1
  2272. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter_add_ext.h +1 -1
  2273. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/scatter_value.h +1 -1
  2274. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/searchsorted.h +1 -1
  2275. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select.h +1 -1
  2276. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_ext_view.h +40 -0
  2277. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_v2.h +1 -1
  2278. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/selu_ext.h +1 -1
  2279. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/selu_grad.h +1 -1
  2280. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sigmoid.h +1 -1
  2281. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sigmoid_grad.h +1 -1
  2282. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sign.h +1 -1
  2283. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silent_check_v2.h +1 -1
  2284. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silent_check_v3.h +1 -1
  2285. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silu.h +1 -1
  2286. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/silu_grad.h +1 -1
  2287. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sin.h +1 -1
  2288. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sinc.h +1 -1
  2289. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sinh.h +1 -1
  2290. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice.h +1 -1
  2291. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice_ext.h +1 -1
  2292. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/slice_ext_view.h +40 -0
  2293. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/smooth_l1_loss.h +1 -1
  2294. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
  2295. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/soft_margin_loss.h +1 -1
  2296. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
  2297. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softmax.h +1 -1
  2298. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softmax_backward.h +1 -1
  2299. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softplus_ext.h +1 -1
  2300. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softplus_grad_ext.h +1 -1
  2301. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softshrink.h +1 -1
  2302. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/softshrink_grad.h +1 -1
  2303. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sort_ext.h +1 -1
  2304. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/speed_fusion_attention.h +1 -1
  2305. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
  2306. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split.h +1 -1
  2307. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_tensor.h +1 -1
  2308. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_tensor_view.h +40 -0
  2309. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_with_size.h +1 -1
  2310. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/split_with_size_view.h +40 -0
  2311. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sqrt.h +1 -1
  2312. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/square.h +1 -1
  2313. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/squeeze.h +1 -1
  2314. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/stack_ext.h +1 -1
  2315. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/std.h +1 -1
  2316. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/std_mean.h +1 -1
  2317. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub.h +1 -1
  2318. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub_ext.h +1 -1
  2319. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sub_scalar.h +1 -1
  2320. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/sum_ext.h +1 -1
  2321. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/swiglu.h +1 -1
  2322. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/swiglu_grad.h +1 -1
  2323. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/t_ext.h +1 -1
  2324. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/take.h +1 -1
  2325. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tan.h +1 -1
  2326. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tanh.h +1 -1
  2327. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tanh_grad.h +1 -1
  2328. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
  2329. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/threshold.h +1 -1
  2330. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/threshold_grad.h +1 -1
  2331. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tile.h +1 -1
  2332. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/topk_ext.h +1 -1
  2333. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/trace_ext.h +1 -1
  2334. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose.h +1 -1
  2335. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_ext_view.h +40 -0
  2336. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_view.h +40 -0
  2337. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/triangular_solve.h +1 -1
  2338. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/tril_ext.h +1 -1
  2339. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/triu.h +1 -1
  2340. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/trunc.h +1 -1
  2341. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/type_as.h +1 -1
  2342. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/uniform_ext.h +1 -1
  2343. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique2.h +1 -1
  2344. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique_consecutive.h +1 -1
  2345. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unique_dim.h +1 -1
  2346. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unstack_ext_view.h +40 -0
  2347. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
  2348. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
  2349. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
  2350. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
  2351. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_linear1d.h +1 -1
  2352. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
  2353. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest1d.h +1 -1
  2354. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
  2355. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest2d.h +1 -1
  2356. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
  2357. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest3d.h +1 -1
  2358. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
  2359. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
  2360. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
  2361. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/var.h +1 -1
  2362. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/var_mean.h +1 -1
  2363. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/view.h +1 -1
  2364. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/view_as.h +1 -1
  2365. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
  2366. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy.h +1 -1
  2367. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
  2368. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
  2369. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/zeros.h +1 -1
  2370. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/zeros_like_ext.h +1 -1
  2371. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adamw.h +5 -5
  2372. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_avg_pool1d.h +2 -3
  2373. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_avg_pool3d_ext.h +2 -3
  2374. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_max_pool1d.h +3 -2
  2375. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/adaptive_max_pool2d.h +3 -2
  2376. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add.h +2 -2
  2377. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_layernorm_v2.h +3 -3
  2378. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_rms_norm.h +3 -3
  2379. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_rmsnorm_quant_v2.h +3 -3
  2380. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/add_scalar.h +2 -2
  2381. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addbmm.h +3 -3
  2382. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addmm.h +3 -3
  2383. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/addmv.h +3 -3
  2384. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/all_finite.h +2 -2
  2385. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/all_gather_matmul.h +3 -3
  2386. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/any.h +34 -0
  2387. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/any_ext.h +35 -0
  2388. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/arange.h +3 -3
  2389. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmax_ext.h +2 -2
  2390. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmax_with_value.h +4 -3
  2391. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmin_ext.h +2 -2
  2392. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argmin_with_value.h +4 -3
  2393. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/argsort.h +3 -3
  2394. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool1d.h +4 -4
  2395. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool2d.h +5 -5
  2396. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool2d_grad.h +5 -5
  2397. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool3d_ext.h +6 -6
  2398. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/avg_pool3d_grad_ext.h +6 -6
  2399. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/baddbmm.h +3 -3
  2400. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_mat_mul.h +3 -3
  2401. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_elemt.h +5 -7
  2402. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_ext.h +6 -5
  2403. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_gather_stats_with_counts.h +5 -5
  2404. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_grad_ext.h +6 -6
  2405. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/batch_norm_stats.h +3 -2
  2406. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bernoulli_ext.h +2 -3
  2407. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy.h +4 -5
  2408. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_grad.h +5 -6
  2409. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_with_logits.h +6 -6
  2410. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/binary_cross_entropy_with_logits_backward.h +4 -4
  2411. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bincount_ext.h +3 -3
  2412. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/bmm_ext.h +2 -3
  2413. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cell_backward_hook.h +27 -0
  2414. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/clone.h +1 -1
  2415. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/contiguous.h +1 -1
  2416. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv1d_ext.h +4 -5
  2417. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv1d_padding.h +5 -6
  2418. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv2d_ext.h +4 -5
  2419. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv2d_padding.h +5 -6
  2420. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv3d_ext.h +4 -5
  2421. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv3d_padding.h +5 -6
  2422. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/conv_transpose2d.h +6 -4
  2423. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution.h +6 -6
  2424. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution_grad.h +3 -3
  2425. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/convolution_str.h +6 -7
  2426. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/copy.h +1 -1
  2427. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/count_nonzero.h +2 -3
  2428. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross.h +2 -2
  2429. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross_entropy_loss.h +37 -0
  2430. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cross_entropy_loss_grad.h +38 -0
  2431. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cummax.h +3 -3
  2432. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cummin_ext.h +3 -3
  2433. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/cumsum_ext.h +2 -2
  2434. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_ext.h +2 -2
  2435. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_kernel.h +79 -14
  2436. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/custom_launch_aclnn.h +1 -1
  2437. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dense.h +2 -2
  2438. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/diag_ext.h +2 -2
  2439. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divmod.h +2 -3
  2440. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divmods.h +2 -2
  2441. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/divs.h +2 -2
  2442. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_do_mask_ext.h +2 -2
  2443. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_ext.h +2 -3
  2444. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_gen_mask_ext.h +3 -3
  2445. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/dropout_grad_ext.h +2 -2
  2446. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/einsum_ext.h +36 -0
  2447. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/elu_ext.h +2 -2
  2448. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/elu_grad_ext.h +3 -3
  2449. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/embedding.h +4 -5
  2450. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/embedding_dense_backward.h +5 -3
  2451. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/empty.h +35 -0
  2452. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/empty_like.h +36 -0
  2453. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/eye.h +2 -2
  2454. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ffn_ext.h +8 -8
  2455. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fill_scalar.h +2 -2
  2456. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fill_tensor.h +2 -3
  2457. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flash_attention_score.h +4 -4
  2458. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flash_attention_score_grad.h +6 -6
  2459. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/flatten_ext.h +1 -1
  2460. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/floor_div.h +2 -2
  2461. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/floor_div_scalar.h +2 -2
  2462. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/full_like.h +2 -2
  2463. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/func_dropout_ext.h +38 -0
  2464. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/func_max_pool2d.h +38 -0
  2465. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/fused_infer_attention_score.h +15 -18
  2466. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gather_d_grad_v2.h +2 -3
  2467. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gelu_grad.h +2 -2
  2468. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gelu_grad_ext.h +2 -2
  2469. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm.h +38 -0
  2470. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_backward.h +1 -1
  2471. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_backward_fusion.h +37 -0
  2472. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2.h +38 -0
  2473. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2_backward.h +1 -1
  2474. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/gmm_v2_backward_fusion.h +36 -0
  2475. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grid_sampler_2d_grad.h +3 -3
  2476. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grid_sampler_3d_grad.h +3 -3
  2477. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/group_norm.h +3 -3
  2478. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/group_norm_grad.h +3 -3
  2479. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grouped_matmul.h +1 -1
  2480. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/grouped_matmul_v4.h +1 -1
  2481. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/hshrink.h +2 -2
  2482. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/hshrink_grad.h +2 -3
  2483. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/identity.h +1 -1
  2484. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/incre_flash_attention.h +11 -11
  2485. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index.h +2 -2
  2486. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_add_ext.h +3 -3
  2487. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_fill_scalar.h +3 -3
  2488. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/index_fill_tensor.h +3 -3
  2489. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_index.h +2 -2
  2490. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_inplace_index_put.h +4 -5
  2491. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_moe_token_unpermute.h +39 -0
  2492. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inner_non_zero.h +1 -2
  2493. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_add_ext.h +2 -3
  2494. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_addmm.h +3 -4
  2495. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_adds_ext.h +2 -3
  2496. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_bernoulli_scalar.h +35 -0
  2497. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_bernoulli_tensor.h +35 -0
  2498. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_clamp_scalar.h +3 -4
  2499. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_clamp_tensor.h +3 -4
  2500. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_copy.h +2 -3
  2501. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_div.h +2 -2
  2502. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divmod.h +3 -3
  2503. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divmods.h +3 -3
  2504. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_divs.h +2 -2
  2505. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_elu.h +2 -2
  2506. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_erfinv.h +1 -2
  2507. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_exp.h +1 -1
  2508. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_exponential.h +36 -0
  2509. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_diagonal.h +2 -3
  2510. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_scalar.h +2 -2
  2511. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_fill_tensor.h +2 -2
  2512. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor.h +1 -2
  2513. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor_divide.h +2 -3
  2514. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_floor_divides.h +2 -2
  2515. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_grouped_matmul_add.h +3 -3
  2516. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_hardtanh.h +2 -3
  2517. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_index_add.h +3 -4
  2518. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_index_put.h +3 -4
  2519. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_log.h +1 -1
  2520. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_masked_fill_scalar.h +2 -3
  2521. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_masked_fill_tensor.h +2 -3
  2522. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_matmul_add.h +34 -0
  2523. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_mul.h +2 -2
  2524. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_muls.h +2 -2
  2525. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_normal.h +3 -3
  2526. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_put.h +3 -3
  2527. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_random.h +3 -4
  2528. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_relu.h +1 -2
  2529. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_remainder_tensor_scalar.h +34 -0
  2530. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_remainder_tensor_tensor.h +35 -0
  2531. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_add.h +3 -4
  2532. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_src.h +3 -4
  2533. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_src_reduce.h +4 -5
  2534. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_value.h +3 -3
  2535. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_scatter_value_reduce.h +4 -5
  2536. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_silu.h +35 -0
  2537. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_stop_gradient.h +1 -1
  2538. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_sub_ext.h +2 -3
  2539. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_sub_scalar.h +2 -3
  2540. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_tanh.h +1 -2
  2541. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_threshold.h +2 -3
  2542. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_uniform.h +3 -4
  2543. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/inplace_zero.h +1 -2
  2544. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/isclose.h +3 -3
  2545. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/isinf.h +1 -1
  2546. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kl_div.h +3 -3
  2547. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kl_div_grad.h +3 -6
  2548. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/kv_cache_scatter_update.h +3 -5
  2549. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/l1_loss_backward_ext.h +3 -5
  2550. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/l1_loss_ext.h +2 -2
  2551. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/layer_norm_ext.h +3 -3
  2552. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/layer_norm_grad_ext.h +4 -4
  2553. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lerp.h +2 -2
  2554. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lerp_scalar.h +2 -2
  2555. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/lin_space_ext.h +3 -3
  2556. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/linalg_qr.h +1 -2
  2557. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/linalg_vector_norm.h +1 -1
  2558. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/log_softmax_ext.h +3 -4
  2559. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/log_softmax_grad.h +2 -3
  2560. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_fill.h +2 -2
  2561. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_scatter.h +35 -0
  2562. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_select.h +2 -2
  2563. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/masked_select_grad.h +2 -4
  2564. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul.h +3 -3
  2565. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_allreduce_add_rmsnorm.h +3 -3
  2566. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_ext.h +2 -2
  2567. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/matmul_reduce_scatter.h +5 -4
  2568. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_grad_with_indices.h +6 -7
  2569. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_grad_with_mask.h +6 -6
  2570. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_with_indices.h +5 -5
  2571. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_pool_with_mask.h +4 -5
  2572. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/max_unpool2d_ext.h +6 -6
  2573. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mean_ext.h +3 -3
  2574. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/meshgrid.h +4 -3
  2575. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mm_ext.h +2 -2
  2576. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_distribute_combine.h +45 -0
  2577. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_distribute_dispatch.h +41 -0
  2578. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_permute.h +2 -2
  2579. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_unpermute.h +4 -6
  2580. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/moe_token_unpermute_grad.h +3 -4
  2581. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/move_to.h +2 -2
  2582. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mse_loss_ext.h +2 -2
  2583. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mse_loss_grad_ext.h +3 -4
  2584. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/muls.h +2 -2
  2585. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multi_scale_deformable_attn.h +5 -3
  2586. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multi_scale_deformable_attn_grad.h +4 -6
  2587. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/multinomial_ext.h +3 -4
  2588. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/mv.h +2 -2
  2589. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nan_to_num.h +3 -4
  2590. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nansum.h +3 -3
  2591. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ne_scalar.h +2 -2
  2592. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_empty.h +36 -0
  2593. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_full.h +36 -0
  2594. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_ones.h +2 -2
  2595. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/new_zeros.h +1 -1
  2596. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss.h +3 -3
  2597. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_2d.h +3 -3
  2598. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_2d_grad.h +4 -4
  2599. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/nllloss_grad.h +4 -6
  2600. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/non_zero.h +1 -1
  2601. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/non_zero_ext.h +2 -2
  2602. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/norm.h +1 -1
  2603. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_float_float.h +3 -3
  2604. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_float_tensor.h +3 -3
  2605. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_tensor_float.h +3 -3
  2606. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/normal_tensor_tensor.h +3 -4
  2607. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/one_hot_ext.h +3 -3
  2608. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ones.h +2 -2
  2609. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ones_like_ext.h +2 -2
  2610. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/outer.h +2 -2
  2611. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pixel_shuffle.h +2 -2
  2612. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pow_scalar_tensor.h +2 -2
  2613. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/pow_tensor_scalar.h +2 -3
  2614. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/prod_ext.h +3 -3
  2615. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/prompt_flash_attention.h +7 -7
  2616. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_batch_matmul.h +7 -7
  2617. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_matmul.h +41 -0
  2618. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/quant_v2.h +3 -3
  2619. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rand_ext.h +3 -3
  2620. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rand_like_ext.h +3 -3
  2621. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randint.h +3 -4
  2622. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randint_like.h +3 -4
  2623. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randn.h +3 -3
  2624. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randn_like.h +3 -3
  2625. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/randperm_ext.h +3 -3
  2626. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/reduce_all.h +2 -2
  2627. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/relu_grad.h +2 -2
  2628. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_grad.h +3 -3
  2629. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_int.h +3 -4
  2630. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/repeat_interleave_tensor.h +4 -5
  2631. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/reshape.h +4 -2
  2632. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/ring_attention_update.h +38 -0
  2633. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rms_norm.h +4 -4
  2634. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/rotary_position_embedding_grad.h +3 -3
  2635. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/round.h +2 -2
  2636. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/scatter_add_ext.h +3 -3
  2637. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/searchsorted.h +3 -4
  2638. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sigmoid_grad.h +2 -2
  2639. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/silent_check_v2.h +5 -5
  2640. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/silent_check_v3.h +6 -4
  2641. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/smooth_l1_loss.h +3 -4
  2642. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/smooth_l1_loss_grad.h +4 -5
  2643. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/soft_margin_loss.h +2 -3
  2644. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/soft_margin_loss_grad.h +3 -4
  2645. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softmax.h +2 -2
  2646. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softshrink.h +2 -2
  2647. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/softshrink_grad.h +2 -3
  2648. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sort_ext.h +5 -5
  2649. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/speed_fusion_attention.h +4 -4
  2650. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/speed_fusion_attention_grad.h +11 -12
  2651. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/square.h +1 -1
  2652. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/std.h +3 -3
  2653. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/std_mean.h +1 -1
  2654. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sub.h +2 -2
  2655. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sub_scalar.h +2 -2
  2656. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/sum_ext.h +3 -3
  2657. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/t_ext.h +1 -1
  2658. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/take.h +2 -2
  2659. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/tanh_grad.h +2 -2
  2660. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/threshold.h +2 -2
  2661. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/threshold_grad.h +2 -2
  2662. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/tile.h +1 -1
  2663. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/type_as.h +2 -2
  2664. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/uniform_ext.h +3 -3
  2665. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique2.h +2 -2
  2666. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique_consecutive.h +2 -2
  2667. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unique_dim.h +2 -2
  2668. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unstack_ext_view.h +38 -0
  2669. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bicubic2d.h +4 -5
  2670. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bicubic2d_grad.h +5 -6
  2671. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bilinear2d.h +4 -5
  2672. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_bilinear2d_grad.h +6 -6
  2673. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_linear1d.h +4 -5
  2674. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_linear1d_grad.h +5 -6
  2675. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest1d.h +3 -4
  2676. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest1d_grad.h +4 -5
  2677. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest2d.h +3 -4
  2678. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest2d_grad.h +4 -5
  2679. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest3d.h +3 -4
  2680. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_nearest3d_grad.h +4 -5
  2681. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_trilinear3d.h +4 -5
  2682. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/upsample_trilinear3d_grad.h +6 -6
  2683. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/var.h +3 -3
  2684. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/var_mean.h +1 -1
  2685. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/view_as.h +2 -2
  2686. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/weight_quant_batch_matmul.h +5 -5
  2687. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/zeros.h +2 -2
  2688. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/zeros_like_ext.h +2 -3
  2689. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/apply_rotary_pos_emb.h +40 -0
  2690. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/flash_attention_score.h +40 -0
  2691. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/mla.h +40 -0
  2692. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/paged_attention.h +40 -0
  2693. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/auto_generate/reshape_and_cache.h +40 -0
  2694. mindspore/include/mindspore/ops/kernel/ascend/pyboost/internal/functions/functions.h +35 -0
  2695. mindspore/include/mindspore/ops/kernel/cpu/empty_cpu_kernel.h +55 -0
  2696. mindspore/include/mindspore/ops/kernel/cpu/empty_like_cpu_kernel.h +55 -0
  2697. mindspore/include/mindspore/ops/kernel/cpu/grid_sampler_2d_grad_cpu_kernel.h +3 -15
  2698. mindspore/include/mindspore/ops/kernel/cpu/map_tensor/map_tensor_get_data_cpu_kernel.h +1 -1
  2699. mindspore/include/mindspore/ops/kernel/cpu/map_tensor/map_tensor_get_grad_cpu_kernel.h +1 -1
  2700. mindspore/include/mindspore/ops/kernel/cpu/masked_fill_cpu_kernel.h +1 -0
  2701. mindspore/include/mindspore/ops/kernel/cpu/multi_margin_loss_cpu_kernel.h +6 -6
  2702. mindspore/include/mindspore/ops/kernel/cpu/new_empty_cpu_kernel.h +55 -0
  2703. mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_avx512_instructions.h +0 -3
  2704. mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_avx_instructions.h +0 -3
  2705. mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/ms_simd_sse_instructions.h +0 -3
  2706. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/abs.h +1 -1
  2707. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/acos_ext.h +38 -0
  2708. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
  2709. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
  2710. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
  2711. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
  2712. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
  2713. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add.h +1 -1
  2714. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_ext.h +1 -1
  2715. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
  2716. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
  2717. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/add_scalar.h +1 -1
  2718. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/addcdiv_ext.h +1 -1
  2719. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/addcmul_ext.h +1 -1
  2720. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/any.h +38 -0
  2721. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/any_ext.h +38 -0
  2722. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/apply_rotary_pos_emb.h +38 -0
  2723. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argmax_with_value.h +1 -1
  2724. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argmin_with_value.h +1 -1
  2725. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/argsort.h +1 -1
  2726. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/as_strided.h +1 -1
  2727. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/atan_ext.h +38 -0
  2728. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/atanh.h +1 -1
  2729. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool1d.h +1 -1
  2730. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool2d.h +1 -1
  2731. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
  2732. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
  2733. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
  2734. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/baddbmm.h +1 -1
  2735. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_mat_mul.h +1 -1
  2736. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_elemt.h +1 -1
  2737. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
  2738. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
  2739. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
  2740. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/batch_norm_stats.h +1 -1
  2741. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bernoulli_ext.h +1 -1
  2742. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy.h +1 -1
  2743. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
  2744. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
  2745. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
  2746. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
  2747. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
  2748. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_not.h +1 -1
  2749. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
  2750. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
  2751. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
  2752. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
  2753. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/bmm_ext.h +1 -1
  2754. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/broadcast_to.h +1 -1
  2755. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/broadcast_to_view.h +38 -0
  2756. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cast.h +1 -1
  2757. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ceil.h +1 -1
  2758. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cell_backward_hook.h +38 -0
  2759. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/chunk.h +1 -1
  2760. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/chunk_view.h +38 -0
  2761. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clamp_scalar.h +1 -1
  2762. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clamp_tensor.h +1 -1
  2763. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/clone.h +1 -1
  2764. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/col2im_ext.h +1 -1
  2765. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/col2im_grad.h +1 -1
  2766. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/concat.h +1 -1
  2767. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/constant_pad_nd.h +1 -1
  2768. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/contiguous.h +1 -1
  2769. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv1d_padding.h +1 -1
  2770. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv2d_ext.h +1 -1
  2771. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv2d_padding.h +1 -1
  2772. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv3d_padding.h +1 -1
  2773. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/conv_transpose2d.h +1 -1
  2774. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution.h +1 -1
  2775. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_grad.h +1 -1
  2776. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_str.h +1 -1
  2777. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/convolution_str_grad.h +1 -1
  2778. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/copy.h +1 -1
  2779. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cos.h +1 -1
  2780. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cosh.h +1 -1
  2781. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/count_nonzero.h +1 -1
  2782. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross.h +1 -1
  2783. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross_entropy_loss.h +38 -0
  2784. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cross_entropy_loss_grad.h +38 -0
  2785. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/cummax.h +1 -1
  2786. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dense.h +1 -1
  2787. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/diag_ext.h +1 -1
  2788. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/diagonal_view.h +38 -0
  2789. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
  2790. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
  2791. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +38 -0
  2792. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
  2793. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
  2794. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
  2795. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_barrier.h +1 -1
  2796. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
  2797. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
  2798. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_gather.h +1 -1
  2799. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
  2800. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_irecv.h +1 -1
  2801. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_isend.h +1 -1
  2802. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce.h +1 -1
  2803. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
  2804. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
  2805. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +38 -0
  2806. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_scatter.h +1 -1
  2807. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
  2808. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/div.h +1 -1
  2809. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divmod.h +1 -1
  2810. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divmods.h +1 -1
  2811. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/divs.h +1 -1
  2812. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dot.h +1 -1
  2813. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
  2814. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/einsum_ext.h +38 -0
  2815. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/elu.h +1 -1
  2816. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/embedding.h +1 -1
  2817. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/embedding_dense_backward.h +1 -1
  2818. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/empty.h +38 -0
  2819. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/empty_like.h +38 -0
  2820. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/equal.h +1 -1
  2821. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/equal_ext.h +1 -1
  2822. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erf.h +1 -1
  2823. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erfc.h +1 -1
  2824. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/erfinv.h +1 -1
  2825. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/exp.h +1 -1
  2826. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/exp2.h +1 -1
  2827. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expand_dims.h +1 -1
  2828. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expand_dims_view.h +38 -0
  2829. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/expm1.h +1 -1
  2830. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/eye.h +1 -1
  2831. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ffn_ext.h +1 -1
  2832. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor.h +1 -1
  2833. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor_div.h +1 -1
  2834. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/floor_div_scalar.h +1 -1
  2835. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/fmod_tensor.h +1 -1
  2836. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/frac.h +1 -1
  2837. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/full_like.h +1 -1
  2838. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/func_dropout_ext.h +38 -0
  2839. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
  2840. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gather_d.h +1 -1
  2841. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
  2842. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gcd.h +1 -1
  2843. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu.h +1 -1
  2844. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_ext.h +1 -1
  2845. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_grad.h +1 -1
  2846. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gelu_grad_ext.h +1 -1
  2847. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/generator.h +1 -1
  2848. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/glu.h +1 -1
  2849. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/glu_grad.h +1 -1
  2850. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater.h +1 -1
  2851. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater_equal.h +1 -1
  2852. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/greater_equal_scalar.h +1 -1
  2853. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_2d.h +1 -1
  2854. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
  2855. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_3d.h +1 -1
  2856. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
  2857. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/group_norm.h +1 -1
  2858. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/group_norm_grad.h +1 -1
  2859. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul.h +1 -1
  2860. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
  2861. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
  2862. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hardtanh.h +1 -1
  2863. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hardtanh_grad.h +1 -1
  2864. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hshrink.h +1 -1
  2865. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hshrink_grad.h +1 -1
  2866. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hsigmoid.h +1 -1
  2867. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hsigmoid_grad.h +1 -1
  2868. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hswish.h +1 -1
  2869. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/hswish_grad.h +1 -1
  2870. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/identity.h +1 -1
  2871. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/im2col_ext.h +1 -1
  2872. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/incre_flash_attention.h +1 -1
  2873. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index.h +1 -1
  2874. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_add_ext.h +1 -1
  2875. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_fill_scalar.h +1 -1
  2876. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/index_fill_tensor.h +1 -1
  2877. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
  2878. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
  2879. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
  2880. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_irecv.h +1 -1
  2881. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_isend.h +1 -1
  2882. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
  2883. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_index.h +1 -1
  2884. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
  2885. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_moe_token_unpermute.h +38 -0
  2886. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inner_non_zero.h +1 -1
  2887. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_addmm.h +1 -1
  2888. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_bernoulli_scalar.h +38 -0
  2889. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_bernoulli_tensor.h +38 -0
  2890. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
  2891. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
  2892. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_copy.h +1 -1
  2893. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_div.h +1 -1
  2894. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divmod.h +1 -1
  2895. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divmods.h +1 -1
  2896. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_divs.h +1 -1
  2897. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_elu.h +1 -1
  2898. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_erfinv.h +1 -1
  2899. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_exponential.h +38 -0
  2900. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
  2901. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
  2902. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
  2903. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor.h +1 -1
  2904. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor_divide.h +1 -1
  2905. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_floor_divides.h +1 -1
  2906. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
  2907. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_hardtanh.h +1 -1
  2908. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_index_add.h +1 -1
  2909. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_index_put.h +1 -1
  2910. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_log.h +1 -1
  2911. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
  2912. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
  2913. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_matmul_add.h +38 -0
  2914. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_mul.h +1 -1
  2915. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_muls.h +1 -1
  2916. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_normal.h +1 -1
  2917. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_put.h +1 -1
  2918. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_random.h +1 -1
  2919. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_relu.h +1 -1
  2920. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_remainder_tensor_scalar.h +38 -0
  2921. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_remainder_tensor_tensor.h +38 -0
  2922. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_add.h +1 -1
  2923. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_src.h +1 -1
  2924. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
  2925. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_value.h +1 -1
  2926. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
  2927. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_silu.h +38 -0
  2928. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
  2929. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_tanh.h +1 -1
  2930. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_threshold.h +1 -1
  2931. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_uniform.h +1 -1
  2932. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/inplace_zero.h +1 -1
  2933. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isclose.h +1 -1
  2934. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isfinite.h +1 -1
  2935. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isinf.h +1 -1
  2936. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/isneginf.h +1 -1
  2937. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kl_div.h +1 -1
  2938. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kl_div_grad.h +1 -1
  2939. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kthvalue.h +1 -1
  2940. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
  2941. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/layer_norm_ext.h +1 -1
  2942. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/leaky_relu_ext.h +1 -1
  2943. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
  2944. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/lerp.h +1 -1
  2945. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/lerp_scalar.h +1 -1
  2946. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/less.h +1 -1
  2947. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/less_equal.h +1 -1
  2948. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/linalg_qr.h +1 -1
  2949. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/linalg_vector_norm.h +1 -1
  2950. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log.h +1 -1
  2951. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log1p.h +1 -1
  2952. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log_softmax.h +1 -1
  2953. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/log_softmax_grad.h +1 -1
  2954. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logaddexp.h +1 -1
  2955. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logaddexp2.h +1 -1
  2956. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_and.h +1 -1
  2957. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_not.h +1 -1
  2958. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_or.h +1 -1
  2959. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logical_xor.h +1 -1
  2960. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsigmoid.h +1 -1
  2961. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsigmoid_grad.h +1 -1
  2962. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/logsumexp.h +1 -1
  2963. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_fill.h +1 -1
  2964. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_scatter.h +38 -0
  2965. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_select.h +1 -1
  2966. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/masked_select_grad.h +1 -1
  2967. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul.h +1 -1
  2968. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
  2969. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/matmul_ext.h +1 -1
  2970. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/max.h +1 -1
  2971. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/max_dim.h +1 -1
  2972. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/maximum.h +1 -1
  2973. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mean_ext.h +1 -1
  2974. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/median_dim.h +1 -1
  2975. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/median_ext.h +1 -1
  2976. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/meshgrid.h +1 -1
  2977. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/min.h +1 -1
  2978. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/min_dim.h +1 -1
  2979. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/minimum.h +1 -1
  2980. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mish_ext.h +1 -1
  2981. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mish_grad_ext.h +1 -1
  2982. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mla.h +38 -0
  2983. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
  2984. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_distribute_combine.h +38 -0
  2985. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_distribute_dispatch.h +38 -0
  2986. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_finalize_routing.h +1 -1
  2987. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
  2988. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing.h +1 -1
  2989. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing_quant_v2.h +38 -0
  2990. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
  2991. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_permute.h +1 -1
  2992. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
  2993. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_unpermute.h +1 -1
  2994. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
  2995. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mul.h +1 -1
  2996. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/muls.h +1 -1
  2997. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
  2998. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
  2999. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/mv.h +1 -1
  3000. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nan_to_num.h +1 -1
  3001. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ne_scalar.h +1 -1
  3002. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/neg.h +1 -1
  3003. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_empty.h +38 -0
  3004. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_full.h +38 -0
  3005. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_ones.h +1 -1
  3006. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/new_zeros.h +1 -1
  3007. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss.h +1 -1
  3008. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_2d.h +1 -1
  3009. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
  3010. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/nllloss_grad.h +1 -1
  3011. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/non_zero.h +1 -1
  3012. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/norm.h +1 -1
  3013. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_float_float.h +1 -1
  3014. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_float_tensor.h +1 -1
  3015. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_tensor_float.h +1 -1
  3016. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
  3017. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/not_equal.h +1 -1
  3018. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/ones.h +1 -1
  3019. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/outer.h +1 -1
  3020. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/paged_attention.h +38 -0
  3021. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pixel_shuffle.h +1 -1
  3022. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/polar.h +1 -1
  3023. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow.h +1 -1
  3024. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
  3025. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
  3026. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prelu.h +1 -1
  3027. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prelu_grad.h +1 -1
  3028. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/prod_ext.h +1 -1
  3029. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_batch_matmul.h +1 -1
  3030. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_matmul.h +38 -0
  3031. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/quant_v2.h +1 -1
  3032. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randint.h +1 -1
  3033. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randint_like.h +1 -1
  3034. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randn.h +1 -1
  3035. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/randn_like.h +1 -1
  3036. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reciprocal.h +1 -1
  3037. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_all.h +1 -1
  3038. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_any.h +1 -1
  3039. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_max.h +1 -1
  3040. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reduce_min.h +1 -1
  3041. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_1d.h +1 -1
  3042. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
  3043. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_2d.h +1 -1
  3044. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
  3045. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_3d.h +1 -1
  3046. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
  3047. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/relu.h +1 -1
  3048. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/relu_grad.h +1 -1
  3049. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/repeat.h +1 -1
  3050. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_1d.h +1 -1
  3051. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
  3052. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_2d.h +1 -1
  3053. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
  3054. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_3d.h +1 -1
  3055. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
  3056. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reshape.h +1 -1
  3057. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reshape_and_cache.h +38 -0
  3058. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/reverse_v2.h +1 -1
  3059. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rms_norm_grad.h +1 -1
  3060. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/roll.h +1 -1
  3061. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rotary_position_embedding.h +1 -1
  3062. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
  3063. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/round.h +1 -1
  3064. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/rsqrt.h +1 -1
  3065. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter.h +1 -1
  3066. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter_add_ext.h +1 -1
  3067. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/scatter_value.h +1 -1
  3068. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/searchsorted.h +1 -1
  3069. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select.h +1 -1
  3070. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_ext_view.h +38 -0
  3071. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_v2.h +1 -1
  3072. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/selu_ext.h +1 -1
  3073. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/selu_grad.h +1 -1
  3074. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sigmoid.h +1 -1
  3075. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sigmoid_grad.h +1 -1
  3076. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sign.h +1 -1
  3077. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/silu.h +1 -1
  3078. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/silu_grad.h +1 -1
  3079. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sin.h +1 -1
  3080. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sinc.h +1 -1
  3081. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sinh.h +1 -1
  3082. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice.h +1 -1
  3083. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice_ext.h +1 -1
  3084. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/slice_ext_view.h +38 -0
  3085. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/smooth_l1_loss.h +1 -1
  3086. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
  3087. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/soft_margin_loss.h +1 -1
  3088. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
  3089. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softmax.h +1 -1
  3090. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softmax_backward.h +1 -1
  3091. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softplus_ext.h +1 -1
  3092. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softplus_grad_ext.h +1 -1
  3093. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softshrink.h +1 -1
  3094. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/softshrink_grad.h +1 -1
  3095. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/speed_fusion_attention.h +1 -1
  3096. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
  3097. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/split.h +1 -1
  3098. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sqrt.h +1 -1
  3099. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/square.h +1 -1
  3100. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/squeeze.h +1 -1
  3101. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/stack_ext.h +38 -0
  3102. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub.h +1 -1
  3103. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub_ext.h +1 -1
  3104. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sub_scalar.h +1 -1
  3105. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/sum_ext.h +1 -1
  3106. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/take.h +1 -1
  3107. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tan.h +1 -1
  3108. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tanh.h +1 -1
  3109. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tanh_grad.h +1 -1
  3110. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
  3111. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/threshold.h +1 -1
  3112. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/threshold_grad.h +1 -1
  3113. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/tile.h +1 -1
  3114. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose.h +1 -1
  3115. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_ext_view.h +38 -0
  3116. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_view.h +38 -0
  3117. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/triangular_solve.h +1 -1
  3118. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/triu.h +1 -1
  3119. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/trunc.h +1 -1
  3120. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/unique_consecutive.h +1 -1
  3121. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
  3122. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
  3123. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
  3124. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
  3125. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_linear1d.h +1 -1
  3126. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
  3127. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest1d.h +1 -1
  3128. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
  3129. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest2d.h +1 -1
  3130. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
  3131. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest3d.h +1 -1
  3132. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
  3133. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
  3134. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
  3135. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/view.h +1 -1
  3136. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
  3137. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy.h +1 -1
  3138. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
  3139. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
  3140. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/zeros.h +1 -1
  3141. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/zeros_like_ext.h +38 -0
  3142. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/any.h +34 -0
  3143. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/any_ext.h +35 -0
  3144. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/binary_cross_entropy_with_logits.h +4 -4
  3145. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/cell_backward_hook.h +27 -0
  3146. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/clamp_scalar.h +2 -2
  3147. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/clamp_tensor.h +2 -3
  3148. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/contiguous.h +1 -1
  3149. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/copy.h +1 -1
  3150. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dense.h +2 -2
  3151. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_all_gather.h +1 -1
  3152. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_all_reduce.h +36 -0
  3153. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_barrier.h +36 -0
  3154. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_broadcast.h +2 -2
  3155. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_gather.h +1 -1
  3156. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_irecv.h +36 -0
  3157. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_isend.h +36 -0
  3158. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/dist_comm_scatter.h +1 -1
  3159. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/divmod.h +2 -3
  3160. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/empty.h +35 -0
  3161. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/empty_like.h +36 -0
  3162. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/group_norm.h +3 -3
  3163. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/grouped_matmul.h +1 -1
  3164. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/identity.h +1 -1
  3165. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/inner_comm_all_reduce.h +2 -3
  3166. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/inplace_copy.h +2 -2
  3167. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/layer_norm_ext.h +3 -3
  3168. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/masked_select.h +2 -2
  3169. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/matmul_ext.h +1 -1
  3170. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/max.h +1 -1
  3171. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/mean_ext.h +1 -1
  3172. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/meshgrid.h +4 -2
  3173. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/min.h +1 -1
  3174. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_empty.h +36 -0
  3175. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_ones.h +2 -2
  3176. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/new_zeros.h +2 -2
  3177. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/non_zero.h +1 -1
  3178. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pixel_shuffle.h +2 -2
  3179. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pow_scalar_tensor.h +1 -1
  3180. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/pow_tensor_scalar.h +1 -1
  3181. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/prod_ext.h +1 -1
  3182. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/reshape.h +4 -2
  3183. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/round.h +1 -1
  3184. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/searchsorted.h +3 -4
  3185. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/silu.h +1 -1
  3186. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/silu_grad.h +1 -2
  3187. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/sum_ext.h +1 -1
  3188. mindspore/include/mindspore/ops/kernel/cpu/pyboost/customize/unique_consecutive.h +2 -2
  3189. mindspore/include/mindspore/ops/kernel/cpu/sample_distorted_bounding_box_v2_cpu_kernel.h +4 -0
  3190. mindspore/include/mindspore/ops/kernel/cpu/sequence/bool_binary_arithmetic_cpu_kernel.h +42 -0
  3191. mindspore/include/mindspore/ops/kernel/cpu/sequence/sequence_len_cpu_kernel.h +0 -1
  3192. mindspore/include/mindspore/ops/kernel/cpu/sparse_apply_adagrad_cpu_kernel.h +1 -1
  3193. mindspore/include/mindspore/ops/kernel/cpu/sparse_apply_adagrad_v2_cpu_kernel.h +2 -2
  3194. mindspore/include/mindspore/ops/kernel/cpu/stft_cpu_kernel.h +16 -16
  3195. mindspore/include/mindspore/ops/kernel/cpu/utils/sampling_kernels.h +18 -15
  3196. mindspore/include/mindspore/ops/kernel/gpu/arrays/broadcast_to_gpu_kernel.h +1 -1
  3197. mindspore/include/mindspore/ops/kernel/gpu/arrays/contiguous_gpu_kernel.h +9 -6
  3198. mindspore/include/mindspore/ops/kernel/gpu/arrays/select_gpu_kernel.h +2 -2
  3199. mindspore/include/mindspore/ops/kernel/gpu/arrays/unique_consecutive_gpu_kernel.h +2 -2
  3200. mindspore/include/mindspore/ops/kernel/gpu/cuda_impl/cuda_class/unique_consecutive_helper.h +1 -1
  3201. mindspore/include/mindspore/ops/kernel/gpu/dynamic_akg/dynamic_utils.h +1 -1
  3202. mindspore/include/mindspore/ops/kernel/gpu/gpu_kernel.h +3 -4
  3203. mindspore/include/mindspore/ops/kernel/gpu/math/binary_ext_ops_gpu_kernel.h +2 -2
  3204. mindspore/include/mindspore/ops/kernel/gpu/math/correlate_gpu_kernel.h +15 -11
  3205. mindspore/include/mindspore/ops/kernel/gpu/math/eps_gpu_kernel.h +4 -4
  3206. mindspore/include/mindspore/ops/kernel/gpu/math/tracev2_grad_gpu_kernel.h +0 -1
  3207. mindspore/include/mindspore/ops/kernel/gpu/nccl/nccl_recv_gpu_kernel.h +1 -1
  3208. mindspore/include/mindspore/ops/kernel/gpu/nccl/nccl_send_gpu_kernel.h +1 -1
  3209. mindspore/include/mindspore/ops/kernel/gpu/nn/activation_grad_kernel.h +3 -3
  3210. mindspore/include/mindspore/ops/kernel/gpu/nn/adagrad_gpu_kernel.h +4 -2
  3211. mindspore/include/mindspore/ops/kernel/gpu/nn/adam_gpu_kernel.h +1 -1
  3212. mindspore/include/mindspore/ops/kernel/gpu/nn/batch_norm_grad_gpu_kernel.h +1 -0
  3213. mindspore/include/mindspore/ops/kernel/gpu/nn/kl_div_loss_grad_kernel.h +3 -2
  3214. mindspore/include/mindspore/ops/kernel/gpu/other/dynamic_stitch_gpu_kernel.h +6 -6
  3215. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/abs.h +1 -1
  3216. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool1d.h +1 -1
  3217. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool2d_grad_ext.h +1 -1
  3218. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_avg_pool3d_grad_ext.h +1 -1
  3219. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_max_pool1d.h +1 -1
  3220. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/adaptive_max_pool2d.h +1 -1
  3221. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add.h +1 -1
  3222. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_ext.h +1 -1
  3223. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_layer_norm_grad.h +1 -1
  3224. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_rmsnorm_quant_v2.h +1 -1
  3225. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/add_scalar.h +1 -1
  3226. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/addcdiv_ext.h +1 -1
  3227. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/addcmul_ext.h +1 -1
  3228. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/any.h +38 -0
  3229. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/any_ext.h +38 -0
  3230. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/apply_rotary_pos_emb.h +38 -0
  3231. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argmax_with_value.h +1 -1
  3232. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argmin_with_value.h +1 -1
  3233. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/argsort.h +1 -1
  3234. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/as_strided.h +1 -1
  3235. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/atanh.h +1 -1
  3236. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool1d.h +1 -1
  3237. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool2d.h +1 -1
  3238. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool2d_grad.h +1 -1
  3239. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool3d_ext.h +1 -1
  3240. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/avg_pool3d_grad_ext.h +1 -1
  3241. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/baddbmm.h +1 -1
  3242. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_mat_mul.h +1 -1
  3243. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_elemt.h +1 -1
  3244. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_elemt_grad.h +1 -1
  3245. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_gather_stats_with_counts.h +1 -1
  3246. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_reduce_grad.h +1 -1
  3247. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/batch_norm_stats.h +1 -1
  3248. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bernoulli_ext.h +1 -1
  3249. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy.h +1 -1
  3250. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_grad.h +1 -1
  3251. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_with_logits.h +1 -1
  3252. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/binary_cross_entropy_with_logits_backward.h +1 -1
  3253. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_and_scalar.h +1 -1
  3254. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_and_tensor.h +1 -1
  3255. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_not.h +1 -1
  3256. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_or_scalar.h +1 -1
  3257. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_or_tensor.h +1 -1
  3258. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_xor_scalar.h +1 -1
  3259. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bitwise_xor_tensor.h +1 -1
  3260. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/bmm_ext.h +1 -1
  3261. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/broadcast_to.h +1 -1
  3262. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/broadcast_to_view.h +38 -0
  3263. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cast.h +1 -1
  3264. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ceil.h +1 -1
  3265. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cell_backward_hook.h +38 -0
  3266. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/chunk.h +1 -1
  3267. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/chunk_view.h +38 -0
  3268. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clamp_scalar.h +1 -1
  3269. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clamp_tensor.h +1 -1
  3270. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/clone.h +1 -1
  3271. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/col2im_ext.h +1 -1
  3272. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/col2im_grad.h +1 -1
  3273. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/concat.h +1 -1
  3274. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/constant_pad_nd.h +1 -1
  3275. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/contiguous.h +1 -1
  3276. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv1d_padding.h +1 -1
  3277. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv2d_ext.h +1 -1
  3278. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv2d_padding.h +1 -1
  3279. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv3d_padding.h +1 -1
  3280. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/conv_transpose2d.h +1 -1
  3281. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution.h +1 -1
  3282. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_grad.h +1 -1
  3283. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_str.h +1 -1
  3284. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/convolution_str_grad.h +1 -1
  3285. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/copy.h +1 -1
  3286. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cos.h +1 -1
  3287. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cosh.h +1 -1
  3288. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/count_nonzero.h +1 -1
  3289. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross.h +1 -1
  3290. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross_entropy_loss.h +38 -0
  3291. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cross_entropy_loss_grad.h +38 -0
  3292. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/cummax.h +1 -1
  3293. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dense.h +1 -1
  3294. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/diag_ext.h +1 -1
  3295. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/diagonal_view.h +38 -0
  3296. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather.h +1 -1
  3297. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor.h +1 -1
  3298. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_gather_into_tensor_uneven.h +38 -0
  3299. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_reduce.h +1 -1
  3300. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_to_all_v.h +1 -1
  3301. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_all_to_all_v_single.h +1 -1
  3302. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_barrier.h +1 -1
  3303. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_batch_isend_irecv.h +1 -1
  3304. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_broadcast.h +1 -1
  3305. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_gather.h +1 -1
  3306. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_gather_into_tensor.h +1 -1
  3307. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_irecv.h +1 -1
  3308. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_isend.h +1 -1
  3309. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce.h +1 -1
  3310. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter.h +1 -1
  3311. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor.h +1 -1
  3312. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_reduce_scatter_tensor_uneven.h +38 -0
  3313. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_scatter.h +1 -1
  3314. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dist_comm_scatter_tensor.h +1 -1
  3315. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/div.h +1 -1
  3316. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divmod.h +1 -1
  3317. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divmods.h +1 -1
  3318. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/divs.h +1 -1
  3319. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dot.h +1 -1
  3320. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/dynamic_quant_ext.h +1 -1
  3321. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/einsum_ext.h +38 -0
  3322. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/elu.h +1 -1
  3323. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/embedding.h +1 -1
  3324. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/embedding_dense_backward.h +1 -1
  3325. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/equal.h +1 -1
  3326. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/equal_ext.h +1 -1
  3327. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erf.h +1 -1
  3328. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erfc.h +1 -1
  3329. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/erfinv.h +1 -1
  3330. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/exp.h +1 -1
  3331. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/exp2.h +1 -1
  3332. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expand_dims.h +1 -1
  3333. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expand_dims_view.h +38 -0
  3334. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/expm1.h +1 -1
  3335. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/eye.h +1 -1
  3336. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ffn_ext.h +1 -1
  3337. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor.h +1 -1
  3338. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor_div.h +1 -1
  3339. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/floor_div_scalar.h +1 -1
  3340. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/fmod_tensor.h +1 -1
  3341. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/frac.h +1 -1
  3342. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/full_like.h +1 -1
  3343. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/func_dropout_ext.h +38 -0
  3344. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/fused_infer_attention_score.h +1 -1
  3345. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gather_d.h +1 -1
  3346. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gather_d_grad_v2.h +1 -1
  3347. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gcd.h +1 -1
  3348. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu.h +1 -1
  3349. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_ext.h +1 -1
  3350. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_grad.h +1 -1
  3351. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gelu_grad_ext.h +1 -1
  3352. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/generator.h +1 -1
  3353. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/glu.h +1 -1
  3354. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/glu_grad.h +1 -1
  3355. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater.h +1 -1
  3356. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater_equal.h +1 -1
  3357. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/greater_equal_scalar.h +1 -1
  3358. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_2d.h +1 -1
  3359. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_2d_grad.h +1 -1
  3360. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_3d.h +1 -1
  3361. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grid_sampler_3d_grad.h +1 -1
  3362. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/group_norm.h +1 -1
  3363. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/group_norm_grad.h +1 -1
  3364. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul.h +1 -1
  3365. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul_v2.h +1 -1
  3366. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/grouped_matmul_v4.h +1 -1
  3367. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hardtanh.h +1 -1
  3368. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hardtanh_grad.h +1 -1
  3369. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hshrink.h +1 -1
  3370. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hshrink_grad.h +1 -1
  3371. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hsigmoid.h +1 -1
  3372. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hsigmoid_grad.h +1 -1
  3373. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hswish.h +1 -1
  3374. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/hswish_grad.h +1 -1
  3375. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/identity.h +1 -1
  3376. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/im2col_ext.h +1 -1
  3377. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/incre_flash_attention.h +1 -1
  3378. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index.h +1 -1
  3379. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_add_ext.h +1 -1
  3380. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_fill_scalar.h +1 -1
  3381. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/index_fill_tensor.h +1 -1
  3382. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_gather.h +1 -1
  3383. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_reduce.h +1 -1
  3384. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_all_to_all_v.h +1 -1
  3385. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_irecv.h +1 -1
  3386. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_isend.h +1 -1
  3387. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_comm_reduce_scatter.h +1 -1
  3388. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_index.h +1 -1
  3389. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_inplace_index_put.h +1 -1
  3390. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_moe_token_unpermute.h +38 -0
  3391. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inner_non_zero.h +1 -1
  3392. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_addmm.h +1 -1
  3393. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_bernoulli_scalar.h +38 -0
  3394. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_bernoulli_tensor.h +38 -0
  3395. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_clamp_scalar.h +1 -1
  3396. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_clamp_tensor.h +1 -1
  3397. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_copy.h +1 -1
  3398. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_div.h +1 -1
  3399. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divmod.h +1 -1
  3400. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divmods.h +1 -1
  3401. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_divs.h +1 -1
  3402. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_elu.h +1 -1
  3403. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_erfinv.h +1 -1
  3404. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_exponential.h +38 -0
  3405. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_diagonal.h +1 -1
  3406. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_scalar.h +1 -1
  3407. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_fill_tensor.h +1 -1
  3408. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor.h +1 -1
  3409. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor_divide.h +1 -1
  3410. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_floor_divides.h +1 -1
  3411. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_grouped_matmul_add.h +1 -1
  3412. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_hardtanh.h +1 -1
  3413. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_index_add.h +1 -1
  3414. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_index_put.h +1 -1
  3415. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_log.h +1 -1
  3416. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_masked_fill_scalar.h +1 -1
  3417. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_masked_fill_tensor.h +1 -1
  3418. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_matmul_add.h +38 -0
  3419. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_mul.h +1 -1
  3420. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_muls.h +1 -1
  3421. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_normal.h +1 -1
  3422. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_put.h +1 -1
  3423. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_random.h +1 -1
  3424. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_relu.h +1 -1
  3425. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_add.h +1 -1
  3426. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_src.h +1 -1
  3427. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_src_reduce.h +1 -1
  3428. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_value.h +1 -1
  3429. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_scatter_value_reduce.h +1 -1
  3430. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_silu.h +38 -0
  3431. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_sub_scalar.h +1 -1
  3432. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_tanh.h +1 -1
  3433. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_threshold.h +1 -1
  3434. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_uniform.h +1 -1
  3435. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/inplace_zero.h +1 -1
  3436. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isclose.h +1 -1
  3437. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isfinite.h +1 -1
  3438. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isinf.h +1 -1
  3439. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/isneginf.h +1 -1
  3440. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kl_div.h +1 -1
  3441. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kl_div_grad.h +1 -1
  3442. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kthvalue.h +1 -1
  3443. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/kv_cache_scatter_update.h +1 -1
  3444. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/leaky_relu_ext.h +1 -1
  3445. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/leaky_relu_grad_ext.h +1 -1
  3446. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/lerp.h +1 -1
  3447. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/lerp_scalar.h +1 -1
  3448. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/less.h +1 -1
  3449. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/less_equal.h +1 -1
  3450. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/linalg_qr.h +1 -1
  3451. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/linalg_vector_norm.h +1 -1
  3452. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log.h +1 -1
  3453. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log1p.h +1 -1
  3454. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log_softmax.h +1 -1
  3455. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/log_softmax_grad.h +1 -1
  3456. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logaddexp.h +1 -1
  3457. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logaddexp2.h +1 -1
  3458. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_and.h +1 -1
  3459. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_not.h +1 -1
  3460. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_or.h +1 -1
  3461. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logical_xor.h +1 -1
  3462. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsigmoid.h +1 -1
  3463. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsigmoid_grad.h +1 -1
  3464. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/logsumexp.h +1 -1
  3465. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_fill.h +1 -1
  3466. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_scatter.h +38 -0
  3467. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_select.h +1 -1
  3468. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/masked_select_grad.h +1 -1
  3469. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul.h +1 -1
  3470. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul_allreduce_add_rmsnorm.h +1 -1
  3471. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/matmul_ext.h +1 -1
  3472. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/max.h +1 -1
  3473. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/max_dim.h +1 -1
  3474. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/maximum.h +1 -1
  3475. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mean_ext.h +1 -1
  3476. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/median_dim.h +1 -1
  3477. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/median_ext.h +1 -1
  3478. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/meshgrid.h +1 -1
  3479. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/min.h +1 -1
  3480. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/min_dim.h +1 -1
  3481. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/minimum.h +1 -1
  3482. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mish_ext.h +1 -1
  3483. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mish_grad_ext.h +1 -1
  3484. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mla.h +38 -0
  3485. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_compute_expert_tokens.h +1 -1
  3486. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_distribute_combine.h +38 -0
  3487. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_distribute_dispatch.h +38 -0
  3488. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_finalize_routing.h +1 -1
  3489. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_gating_top_k_softmax.h +1 -1
  3490. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing.h +1 -1
  3491. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing_quant_v2.h +38 -0
  3492. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_init_routing_v2.h +1 -1
  3493. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_permute.h +1 -1
  3494. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_permute_grad.h +1 -1
  3495. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_unpermute.h +1 -1
  3496. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/moe_token_unpermute_grad.h +1 -1
  3497. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mul.h +1 -1
  3498. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/muls.h +1 -1
  3499. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/multi_scale_deformable_attn.h +1 -1
  3500. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/multi_scale_deformable_attn_grad.h +1 -1
  3501. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/mv.h +1 -1
  3502. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nan_to_num.h +1 -1
  3503. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ne_scalar.h +1 -1
  3504. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/neg.h +1 -1
  3505. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_full.h +38 -0
  3506. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_ones.h +1 -1
  3507. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/new_zeros.h +1 -1
  3508. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss.h +1 -1
  3509. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_2d.h +1 -1
  3510. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_2d_grad.h +1 -1
  3511. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/nllloss_grad.h +1 -1
  3512. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/non_zero.h +1 -1
  3513. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/norm.h +1 -1
  3514. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_float_float.h +1 -1
  3515. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_float_tensor.h +1 -1
  3516. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_tensor_float.h +1 -1
  3517. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/normal_tensor_tensor.h +1 -1
  3518. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/not_equal.h +1 -1
  3519. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/ones.h +1 -1
  3520. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/outer.h +1 -1
  3521. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/paged_attention.h +38 -0
  3522. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pixel_shuffle.h +1 -1
  3523. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/polar.h +1 -1
  3524. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow.h +1 -1
  3525. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow_scalar_tensor.h +1 -1
  3526. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/pow_tensor_scalar.h +1 -1
  3527. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prelu.h +1 -1
  3528. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prelu_grad.h +1 -1
  3529. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/prod_ext.h +1 -1
  3530. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_batch_matmul.h +1 -1
  3531. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_matmul.h +38 -0
  3532. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/quant_v2.h +1 -1
  3533. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randint.h +1 -1
  3534. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randint_like.h +1 -1
  3535. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randn.h +1 -1
  3536. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/randn_like.h +1 -1
  3537. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reciprocal.h +1 -1
  3538. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_all.h +1 -1
  3539. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_any.h +1 -1
  3540. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_max.h +1 -1
  3541. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reduce_min.h +1 -1
  3542. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_1d.h +1 -1
  3543. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_1d_grad.h +1 -1
  3544. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_2d.h +1 -1
  3545. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_2d_grad.h +1 -1
  3546. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_3d.h +1 -1
  3547. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reflection_pad_3d_grad.h +1 -1
  3548. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/relu.h +1 -1
  3549. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/relu_grad.h +1 -1
  3550. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/repeat.h +1 -1
  3551. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_1d.h +1 -1
  3552. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_1d_grad.h +1 -1
  3553. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_2d.h +1 -1
  3554. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_2d_grad.h +1 -1
  3555. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_3d.h +1 -1
  3556. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/replication_pad_3d_grad.h +1 -1
  3557. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reshape.h +1 -1
  3558. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reshape_and_cache.h +38 -0
  3559. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/reverse_v2.h +1 -1
  3560. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rms_norm_grad.h +1 -1
  3561. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/roll.h +1 -1
  3562. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rotary_position_embedding.h +1 -1
  3563. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rotary_position_embedding_grad.h +1 -1
  3564. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/round.h +1 -1
  3565. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/rsqrt.h +1 -1
  3566. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter.h +1 -1
  3567. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter_add_ext.h +1 -1
  3568. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/scatter_value.h +1 -1
  3569. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/searchsorted.h +1 -1
  3570. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select.h +1 -1
  3571. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_ext_view.h +38 -0
  3572. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_v2.h +1 -1
  3573. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/selu_ext.h +1 -1
  3574. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/selu_grad.h +1 -1
  3575. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sigmoid.h +1 -1
  3576. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sigmoid_grad.h +1 -1
  3577. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sign.h +1 -1
  3578. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/silu.h +1 -1
  3579. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/silu_grad.h +1 -1
  3580. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sin.h +1 -1
  3581. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sinc.h +1 -1
  3582. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sinh.h +1 -1
  3583. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice.h +1 -1
  3584. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice_ext.h +1 -1
  3585. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/slice_ext_view.h +38 -0
  3586. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/smooth_l1_loss.h +1 -1
  3587. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/smooth_l1_loss_grad.h +1 -1
  3588. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/soft_margin_loss.h +1 -1
  3589. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/soft_margin_loss_grad.h +1 -1
  3590. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softmax.h +1 -1
  3591. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softmax_backward.h +1 -1
  3592. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softplus_ext.h +1 -1
  3593. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softplus_grad_ext.h +1 -1
  3594. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softshrink.h +1 -1
  3595. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/softshrink_grad.h +1 -1
  3596. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/speed_fusion_attention.h +1 -1
  3597. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/speed_fusion_attention_grad.h +1 -1
  3598. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/split.h +1 -1
  3599. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sqrt.h +1 -1
  3600. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/square.h +1 -1
  3601. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/squeeze.h +1 -1
  3602. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub.h +1 -1
  3603. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub_ext.h +1 -1
  3604. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sub_scalar.h +1 -1
  3605. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/sum_ext.h +1 -1
  3606. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/take.h +1 -1
  3607. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tan.h +1 -1
  3608. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tanh.h +1 -1
  3609. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tanh_grad.h +1 -1
  3610. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tensor_scatter_elements.h +1 -1
  3611. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/threshold.h +1 -1
  3612. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/threshold_grad.h +1 -1
  3613. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/tile.h +1 -1
  3614. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose.h +1 -1
  3615. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_ext_view.h +38 -0
  3616. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_view.h +38 -0
  3617. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/triangular_solve.h +1 -1
  3618. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/triu.h +1 -1
  3619. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/trunc.h +1 -1
  3620. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bicubic2d.h +1 -1
  3621. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bicubic2d_grad.h +1 -1
  3622. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bilinear2d.h +1 -1
  3623. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_bilinear2d_grad.h +1 -1
  3624. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_linear1d.h +1 -1
  3625. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_linear1d_grad.h +1 -1
  3626. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest1d.h +1 -1
  3627. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest1d_grad.h +1 -1
  3628. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest2d.h +1 -1
  3629. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest2d_grad.h +1 -1
  3630. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest3d.h +1 -1
  3631. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_nearest3d_grad.h +1 -1
  3632. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_trilinear3d.h +1 -1
  3633. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/upsample_trilinear3d_grad.h +1 -1
  3634. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/view.h +1 -1
  3635. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/weight_quant_batch_matmul.h +1 -1
  3636. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy.h +1 -1
  3637. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy_scalar_other.h +1 -1
  3638. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/xlogy_scalar_self.h +1 -1
  3639. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/zeros.h +1 -1
  3640. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/any.h +34 -0
  3641. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/any_ext.h +35 -0
  3642. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/cell_backward_hook.h +27 -0
  3643. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/clamp_scalar.h +2 -2
  3644. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/clamp_tensor.h +2 -3
  3645. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/contiguous.h +1 -1
  3646. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/copy.h +1 -1
  3647. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/dense.h +2 -2
  3648. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/divmod.h +2 -3
  3649. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/grouped_matmul.h +1 -1
  3650. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/identity.h +1 -1
  3651. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_all_gather.h +1 -1
  3652. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_all_reduce.h +1 -1
  3653. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_isend.h +1 -1
  3654. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/inner_comm_reduce_scatter.h +1 -1
  3655. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/masked_select.h +2 -2
  3656. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/matmul_ext.h +1 -1
  3657. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/max.h +1 -1
  3658. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/mean_ext.h +1 -1
  3659. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/meshgrid.h +4 -2
  3660. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/min.h +1 -1
  3661. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/new_ones.h +2 -2
  3662. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/new_zeros.h +2 -2
  3663. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/non_zero.h +1 -1
  3664. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pixel_shuffle.h +2 -2
  3665. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pow_scalar_tensor.h +1 -1
  3666. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/pow_tensor_scalar.h +1 -1
  3667. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/prod_ext.h +1 -1
  3668. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/reshape.h +4 -2
  3669. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/searchsorted.h +3 -4
  3670. mindspore/include/mindspore/ops/kernel/gpu/pyboost/customize/sum_ext.h +1 -1
  3671. mindspore/include/mindspore/ops/kernel/gpu/random/random_categorical_gpu_kernel.h +4 -8
  3672. mindspore/include/mindspore/ops/kernel/gpu/rl/buffer_sample_gpu_kernel.h +8 -8
  3673. mindspore/include/mindspore/ops/kernel/gpu/rl/gru_gpu_kernel.h +1 -0
  3674. mindspore/include/mindspore/ops/kernel/gpu/sparse/csr_sparse_matrix_to_sparse_tensor_gpu_kernel.h +1 -1
  3675. mindspore/include/mindspore/ops/kernel/gpu/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h +3 -3
  3676. mindspore/include/mindspore/ops/kernel/gpu/sparse/sparse_matrix_sparse_matmul_gpu_kernel.h +2 -2
  3677. mindspore/include/mindspore/ops/kernel/include/common/common_utils.h +9 -0
  3678. mindspore/include/mindspore/ops/kernel/include/common/device_address.h +290 -33
  3679. mindspore/include/mindspore/ops/kernel/include/common/device_type.h +3 -5
  3680. mindspore/include/mindspore/ops/kernel/include/common/kernel.h +26 -1
  3681. mindspore/include/mindspore/ops/kernel/include/common/kernel_tensor.h +135 -306
  3682. mindspore/include/mindspore/ops/op_def/array_op_name.h +0 -1
  3683. mindspore/include/mindspore/ops/op_def/array_ops.h +0 -2
  3684. mindspore/include/mindspore/ops/op_def/auto_generate/gen_lite_ops.h +3255 -2915
  3685. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_def.h +815 -760
  3686. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_a.h +60 -58
  3687. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_b.h +26 -25
  3688. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_c.h +35 -31
  3689. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_d.h +29 -25
  3690. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_e.h +26 -24
  3691. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_f.h +22 -19
  3692. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_g.h +23 -19
  3693. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_h.h +7 -7
  3694. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_i.h +73 -65
  3695. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_k.h +2 -1
  3696. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_l.h +31 -31
  3697. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_m.h +47 -39
  3698. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_n.h +18 -16
  3699. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_o.h +3 -3
  3700. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_p.h +6 -7
  3701. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_q.h +5 -2
  3702. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_r.h +61 -60
  3703. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_s.h +67 -63
  3704. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_t.h +22 -20
  3705. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_u.h +14 -14
  3706. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_v.h +1 -1
  3707. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_x.h +1 -1
  3708. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_name_z.h +1 -1
  3709. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_a.h +60 -58
  3710. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_b.h +26 -25
  3711. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_c.h +35 -31
  3712. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_d.h +29 -25
  3713. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_e.h +26 -24
  3714. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_f.h +22 -19
  3715. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_g.h +23 -19
  3716. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_h.h +7 -7
  3717. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_i.h +73 -65
  3718. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_k.h +2 -1
  3719. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_l.h +31 -31
  3720. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h +47 -39
  3721. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_n.h +18 -16
  3722. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_o.h +3 -3
  3723. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_p.h +6 -7
  3724. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_q.h +5 -2
  3725. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h +61 -60
  3726. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h +67 -63
  3727. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h +22 -20
  3728. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_u.h +14 -14
  3729. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_v.h +1 -1
  3730. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_x.h +1 -1
  3731. mindspore/include/mindspore/ops/op_def/auto_generate/gen_ops_primitive_z.h +1 -1
  3732. mindspore/include/mindspore/ops/op_def/framework_op_name.h +0 -1
  3733. mindspore/include/mindspore/ops/op_def/framework_ops.h +3 -2
  3734. mindspore/include/mindspore/ops/op_def/nn_op_name.h +4 -0
  3735. mindspore/include/mindspore/ops/op_def/op_enum.h +4 -0
  3736. mindspore/include/mindspore/ops/op_def/other_op_name.h +6 -0
  3737. mindspore/include/mindspore/ops/op_def/other_ops.h +2 -0
  3738. mindspore/include/mindspore/ops/op_def/structure_ops.h +12 -3
  3739. mindspore/include/mindspore/ops/ops_utils/memory_overlap.h +4 -5
  3740. mindspore/include/mindspore/ops/ops_utils/op_constants.h +13 -0
  3741. mindspore/include/mindspore/ops/ops_utils/op_utils.h +14 -16
  3742. mindspore/include/mindspore/ops/ops_utils/type_dispatch.h +51 -42
  3743. mindspore/include/mindspore/ops/view/as_strided_strides_calc.h +5 -1
  3744. mindspore/include/mindspore/ops/view/broadcast_to_strides_calc.h +7 -2
  3745. mindspore/include/mindspore/ops/view/broadcast_to_view_strides_calc.h +34 -0
  3746. mindspore/include/mindspore/ops/view/chunk_strides_calc.h +5 -1
  3747. mindspore/include/mindspore/ops/view/chunk_view_strides_calc.h +35 -0
  3748. mindspore/include/mindspore/ops/view/diagonal_strides_calc.h +6 -3
  3749. mindspore/include/mindspore/ops/view/diagonal_view_strides_calc.h +32 -0
  3750. mindspore/include/mindspore/ops/view/expand_dims_strides_calc.h +5 -1
  3751. mindspore/include/mindspore/ops/view/expand_dims_view_strides_calc.h +34 -0
  3752. mindspore/include/mindspore/ops/view/narrow_strides_calc.h +4 -2
  3753. mindspore/include/mindspore/ops/view/narrow_view_strides_calc.h +33 -0
  3754. mindspore/include/mindspore/ops/view/reshape_strides_calc.h +2 -1
  3755. mindspore/include/mindspore/ops/view/select_ext_view_strides_calc.h +33 -0
  3756. mindspore/include/mindspore/ops/view/slice_ext_strides_calc.h +7 -1
  3757. mindspore/include/mindspore/ops/view/slice_ext_view_strides_calc.h +35 -0
  3758. mindspore/include/mindspore/ops/view/slice_strides_calc.h +4 -1
  3759. mindspore/include/mindspore/ops/view/split_strides_calc.h +3 -1
  3760. mindspore/include/mindspore/ops/view/split_tensor_strides_calc.h +5 -2
  3761. mindspore/include/mindspore/ops/view/split_tensor_view_strides_calc.h +33 -0
  3762. mindspore/include/mindspore/ops/view/split_with_size_strides_calc.h +5 -1
  3763. mindspore/include/mindspore/ops/view/split_with_size_view_strides_calc.h +36 -0
  3764. mindspore/include/mindspore/ops/view/squeeze_strides_calc.h +3 -0
  3765. mindspore/include/mindspore/ops/view/transpose_ext_view_strides_calc.h +33 -0
  3766. mindspore/include/mindspore/ops/view/transpose_strides_calc.h +3 -1
  3767. mindspore/include/mindspore/ops/view/transpose_view_strides_calc.h +32 -0
  3768. mindspore/include/mindspore/ops/view/unstack_ext_view_strides_calc.h +28 -0
  3769. mindspore/include/mindspore/ops/view/view_strides_calc.h +5 -2
  3770. mindspore/include/mindspore/ops/view/view_strides_calculator.h +1 -2
  3771. mindspore/include/ms_extension.h +12 -5
  3772. mindspore/lib/libavcodec.so.59 +0 -0
  3773. mindspore/lib/libavdevice.so.59 +0 -0
  3774. mindspore/lib/libavfilter.so.8 +0 -0
  3775. mindspore/lib/libavformat.so.59 +0 -0
  3776. mindspore/lib/libavutil.so.57 +0 -0
  3777. mindspore/lib/libdnnl.so.2 +0 -0
  3778. mindspore/lib/libicuuc.so.74 +0 -0
  3779. mindspore/lib/libmindspore_backend_common.so +0 -0
  3780. mindspore/lib/libmindspore_backend_manager.so +0 -0
  3781. mindspore/lib/libmindspore_common.so +0 -0
  3782. mindspore/lib/libmindspore_core.so +0 -0
  3783. mindspore/lib/libmindspore_dump.so +0 -0
  3784. mindspore/lib/libmindspore_extension.so +0 -0
  3785. mindspore/lib/libmindspore_frontend.so +0 -0
  3786. mindspore/lib/libmindspore_ge_backend.so +0 -0
  3787. mindspore/lib/libmindspore_glog.so.0 +0 -0
  3788. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  3789. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  3790. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  3791. mindspore/lib/libmindspore_memory_pool.so +0 -0
  3792. mindspore/lib/libmindspore_ms_backend.so +0 -0
  3793. mindspore/lib/libmindspore_ops.so +0 -0
  3794. mindspore/lib/libmindspore_ops_kernel_common.so +0 -0
  3795. mindspore/lib/libmindspore_profiler.so +0 -0
  3796. mindspore/lib/libmindspore_pyboost.so +0 -0
  3797. mindspore/lib/libmindspore_pynative.so +0 -0
  3798. mindspore/lib/libmindspore_res_manager.so +0 -0
  3799. mindspore/lib/libmindspore_runtime_pipeline.so +0 -0
  3800. mindspore/lib/libmpi_adapter.so +0 -0
  3801. mindspore/lib/libmpi_collective.so +0 -0
  3802. mindspore/lib/libnnacl.so +0 -0
  3803. mindspore/lib/libopencv_core.so.4.5 +0 -0
  3804. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  3805. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  3806. mindspore/lib/libps_cache.so +0 -0
  3807. mindspore/lib/libswresample.so.4 +0 -0
  3808. mindspore/lib/libswscale.so.6 +0 -0
  3809. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  3810. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +96 -152
  3811. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  3812. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/plugin/npu_supported_ops.json +6 -0
  3813. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/lib/libcust_opapi.so +0 -0
  3814. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -180
  3815. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json +0 -180
  3816. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -180
  3817. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/all_finite.py +28 -12
  3818. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
  3819. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  3820. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
  3821. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  3822. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
  3823. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  3824. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
  3825. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  3826. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
  3827. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  3828. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
  3829. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  3830. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +2 -1
  3831. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  3832. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +2 -1
  3833. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  3834. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +2 -1
  3835. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  3836. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +0 -300
  3837. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/binary_info_config.json +0 -300
  3838. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -300
  3839. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  3840. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  3841. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/inc/op_proto.h +0 -22
  3842. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  3843. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -1
  3844. mindspore/lib/plugin/ascend/custom_compiler/OWNERS +1 -3
  3845. mindspore/lib/plugin/ascend/libakg.so +0 -0
  3846. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  3847. mindspore/lib/plugin/ascend/libd_collective.so +0 -0
  3848. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  3849. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  3850. mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
  3851. mindspore/lib/plugin/ascend/libmindspore_ascend_res_manager.so +0 -0
  3852. mindspore/lib/plugin/ascend/libmindspore_atb_kernels.so +0 -0
  3853. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  3854. mindspore/lib/plugin/ascend/libmindspore_extension_ascend_atb.a +0 -0
  3855. mindspore/lib/plugin/ascend/libmindspore_graph_ir.so +0 -0
  3856. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  3857. mindspore/lib/plugin/ascend/libmindspore_pyboost_atb_kernels.so +0 -0
  3858. mindspore/lib/plugin/ascend/libms_ascend_native_boost.so +0 -0
  3859. mindspore/lib/plugin/ascend/libms_atb_boost.so +0 -0
  3860. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/faUpdate.h +35 -0
  3861. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/fill.h +4 -1
  3862. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/logprobs.h +28 -0
  3863. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/matmul.h +7 -6
  3864. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/norm.h +8 -5
  3865. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/params.h +3 -0
  3866. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/scatter_elements_v2.h +39 -0
  3867. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/fused_add_topk_div.h +42 -0
  3868. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/kvcache.h +7 -1
  3869. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/mla.h +55 -0
  3870. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/mla_preprocess.h +39 -0
  3871. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/pagedattention.h +1 -0
  3872. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/params.h +7 -0
  3873. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/reshape_and_cache.h +2 -1
  3874. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/rms_norm_and_rope_and_reshape_and_cache.h +31 -0
  3875. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/rope_q_concat.h +26 -0
  3876. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/swiglu_quant.h +26 -0
  3877. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/toppsample_rand.h +31 -0
  3878. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/atbops/params/unpad_flash_attention.h +12 -1
  3879. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_api.h +1 -1
  3880. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_comm.h +4 -3
  3881. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcal_types.h +2 -1
  3882. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lccl.h +2 -0
  3883. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc.h +53 -0
  3884. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_args.h +116 -0
  3885. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_base.h +57 -0
  3886. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/lcoc_func.h +33 -0
  3887. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling.h +86 -0
  3888. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_91093.h +31 -0
  3889. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_910B.h +31 -0
  3890. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_args.h +154 -0
  3891. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc/tiling/tiling_func.h +50 -0
  3892. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc.h +5 -35
  3893. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc_args.h +97 -47
  3894. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/lcoc_func.h +33 -0
  3895. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling.h +86 -0
  3896. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_91093.h +31 -0
  3897. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_910B.h +31 -0
  3898. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_args.h +154 -0
  3899. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling/tiling_func.h +50 -0
  3900. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling.h +86 -0
  3901. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_91093.h +11 -9
  3902. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_910B.h +12 -10
  3903. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_args.h +38 -69
  3904. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/lcal/tiling_func.h +14 -8
  3905. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/base/aicpu_kernel_base.h +1 -1
  3906. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/bin_handle.h +6 -0
  3907. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/tensor.h +5 -0
  3908. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/types.h +4 -1
  3909. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/bf16/bf16_t.h +20 -0
  3910. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/cfg/cfg_core.h +39 -0
  3911. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/cfg/cfg_item.h +25 -0
  3912. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/file_system/file_system.h +2 -0
  3913. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/inifile/ini_file.h +2 -0
  3914. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log.h +7 -7
  3915. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log_core.h +1 -0
  3916. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/log/log_sink_file.h +1 -0
  3917. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/mki/utils/rt/base/types.h +2 -2
  3918. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  3919. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  3920. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libatb_mixops.so +0 -0
  3921. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libatb_mixops_static.a +0 -0
  3922. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libexp_mixops_static.a +0 -0
  3923. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libexp_ops_static.a +0 -0
  3924. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
  3925. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  3926. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libmki.so +0 -0
  3927. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libtbe_adapter.so +0 -0
  3928. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/base_type.h +9 -4
  3929. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_op.h +8 -10
  3930. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_creator.h +40 -7
  3931. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +85 -1
  3932. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tiling_utils.h +3 -138
  3933. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_op.so +0 -0
  3934. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_op.so +0 -0
  3935. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_op.so +0 -0
  3936. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_op.so +0 -0
  3937. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_op.so +0 -0
  3938. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_op.so +0 -0
  3939. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcompare_op.so +0 -0
  3940. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libfused_add_topk_div_op.so +0 -0
  3941. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_op.so +0 -0
  3942. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgroup_topk_op.so +0 -0
  3943. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libkv_scale_cache_op.so +0 -0
  3944. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libllama_op.so +0 -0
  3945. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_op.so +0 -0
  3946. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_gating_group_topk_op.so +0 -0
  3947. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_init_routing_op.so +0 -0
  3948. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmoe_token_unpermute_op.so +0 -0
  3949. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  3950. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_gelu_op.so +0 -0
  3951. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_op.so +0 -0
  3952. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_op.so +0 -0
  3953. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_op.so +0 -0
  3954. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_op.so +0 -0
  3955. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_dynamic_quant_op.so +0 -0
  3956. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_matmul_op.so +0 -0
  3957. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_moe_init_routing_op.so +0 -0
  3958. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_paged_attention_op.so +0 -0
  3959. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_reshape_and_cache_nz_op.so +0 -0
  3960. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswft_transpose_batch_matmul_transpose_op.so +0 -0
  3961. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libswiglu_dynamic_quant_op.so +0 -0
  3962. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libtranspose_batch_matmul_transpose_op.so +0 -0
  3963. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_f16_310p/internal_grouped_matmul_f16_310p.o +0 -0
  3964. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_f16_310p/internal_grouped_matmul_f16_310p_0.o +0 -0
  3965. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_i8_310p/internal_grouped_matmul_i8_310p.o +0 -0
  3966. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_grouped_matmul_i8_310p/internal_grouped_matmul_i8_310p_0.o +0 -0
  3967. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz.o +0 -0
  3968. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz_0.o +0 -0
  3969. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress.o +0 -0
  3970. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress_0.o +0 -0
  3971. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz.o +0 -0
  3972. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz_0.o +0 -0
  3973. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libadd_rms_norm_ascend310p.so +0 -0
  3974. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libadd_rms_norm_quant_ascend310p.so +0 -0
  3975. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libapply_rotary_pos_emb_310p_ascend310p.so +0 -0
  3976. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcast_ascend310p.so +0 -0
  3977. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcompare_ascend310p.so +0 -0
  3978. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libfused_add_topk_div_ascend310p.so +0 -0
  3979. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libgelu_ascend310p.so +0 -0
  3980. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmatmul_ascend310p.so +0 -0
  3981. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_gating_group_topk_ascend310p.so +0 -0
  3982. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_init_routing_ascend310p.so +0 -0
  3983. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmoe_token_unpermute_ascend310p.so +0 -0
  3984. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmulti_weight_matmul_kernel_ascend310p.so +0 -0
  3985. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmulti_weight_matmul_kernel_gelu_ascend310p.so +0 -0
  3986. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libreshape_and_cache_nz_ascend310p.so +0 -0
  3987. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_dynamic_quant_ascend310p.so +0 -0
  3988. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_matmul_ascend310p.so +0 -0
  3989. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_moe_init_routing_ascend310p.so +0 -0
  3990. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_paged_attention_ascend310p.so +0 -0
  3991. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_reshape_and_cache_nz_ascend310p.so +0 -0
  3992. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswft_transpose_batch_matmul_transpose_ascend310p.so +0 -0
  3993. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libswiglu_dynamic_quant_ascend310p.so +0 -0
  3994. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  3995. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  3996. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  3997. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  3998. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  3999. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  4000. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  4001. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  4002. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix.o +0 -0
  4003. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aic_0.o +0 -0
  4004. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  4005. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix.o +0 -0
  4006. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aic_0.o +0 -0
  4007. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  4008. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
  4009. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
  4010. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
  4011. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
  4012. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
  4013. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
  4014. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2.o +0 -0
  4015. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aic_0.o +0 -0
  4016. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aiv_0.o +0 -0
  4017. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_layer_norm_ascend910b.so +0 -0
  4018. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_rms_norm_ascend910b.so +0 -0
  4019. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_rms_norm_quant_ascend910b.so +0 -0
  4020. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libapply_rotary_pos_emb_ascend910b.so +0 -0
  4021. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libcast_ascend910b.so +0 -0
  4022. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libcompare_ascend910b.so +0 -0
  4023. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libgelu_ascend910b.so +0 -0
  4024. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libgroup_topk_ascend910b.so +0 -0
  4025. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libkv_scale_cache_ascend910b.so +0 -0
  4026. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libllama_ascend910b.so +0 -0
  4027. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmatmul_ascend910b.so +0 -0
  4028. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmoe_gating_group_topk_ascend910b.so +0 -0
  4029. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libmulti_weight_matmul_kernel_ascend910b.so +0 -0
  4030. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libreshape_and_cache_ascend910b.so +0 -0
  4031. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/librms_norm_ascend910b.so +0 -0
  4032. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libswiglu_dynamic_quant_ascend910b.so +0 -0
  4033. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libtranspose_batch_matmul_transpose_ascend910b.so +0 -0
  4034. mindspore/lib/plugin/cpu/libakg.so +0 -0
  4035. mindspore/lib/plugin/cpu/libmindspore_cpu_res_manager.so +0 -0
  4036. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  4037. mindspore/lib/plugin/libmindspore_ops_ascend.so +0 -0
  4038. mindspore/lib/plugin/libmindspore_ops_host.so +0 -0
  4039. mindspore/mindrecord/tools/cifar10.py +61 -11
  4040. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  4041. mindspore/mint/__init__.py +6 -46
  4042. mindspore/mint/distributed/__init__.py +5 -0
  4043. mindspore/mint/distributed/distributed.py +429 -23
  4044. mindspore/mint/nn/__init__.py +1 -1
  4045. mindspore/mint/nn/functional.py +53 -6
  4046. mindspore/mint/nn/layer/_functions.py +163 -294
  4047. mindspore/mint/nn/layer/activation.py +8 -6
  4048. mindspore/mint/nn/layer/conv.py +140 -104
  4049. mindspore/mint/nn/layer/normalization.py +11 -25
  4050. mindspore/mint/optim/adam.py +19 -18
  4051. mindspore/mint/optim/adamw.py +14 -8
  4052. mindspore/mint/optim/sgd.py +5 -5
  4053. mindspore/nn/cell.py +491 -623
  4054. mindspore/nn/grad/cell_grad.py +11 -12
  4055. mindspore/nn/layer/activation.py +36 -36
  4056. mindspore/nn/layer/basic.py +74 -77
  4057. mindspore/nn/layer/channel_shuffle.py +4 -4
  4058. mindspore/nn/layer/combined.py +4 -2
  4059. mindspore/nn/layer/conv.py +117 -110
  4060. mindspore/nn/layer/dense.py +9 -7
  4061. mindspore/nn/layer/embedding.py +50 -52
  4062. mindspore/nn/layer/image.py +38 -40
  4063. mindspore/nn/layer/math.py +111 -112
  4064. mindspore/nn/layer/normalization.py +56 -44
  4065. mindspore/nn/layer/pooling.py +58 -63
  4066. mindspore/nn/layer/rnn_cells.py +33 -33
  4067. mindspore/nn/layer/rnns.py +56 -56
  4068. mindspore/nn/layer/thor_layer.py +74 -73
  4069. mindspore/nn/layer/transformer.py +11 -1
  4070. mindspore/nn/learning_rate_schedule.py +20 -20
  4071. mindspore/nn/loss/loss.py +79 -81
  4072. mindspore/nn/optim/adam.py +4 -6
  4073. mindspore/nn/optim/adasum.py +2 -2
  4074. mindspore/nn/optim/asgd.py +2 -0
  4075. mindspore/nn/optim/lamb.py +1 -3
  4076. mindspore/nn/optim/optimizer.py +1 -1
  4077. mindspore/nn/optim/tft_wrapper.py +2 -3
  4078. mindspore/nn/optim/thor.py +2 -2
  4079. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  4080. mindspore/nn/probability/distribution/exponential.py +2 -1
  4081. mindspore/nn/probability/distribution/poisson.py +2 -1
  4082. mindspore/nn/sparse/sparse.py +3 -3
  4083. mindspore/nn/wrap/cell_wrapper.py +73 -42
  4084. mindspore/nn/wrap/grad_reducer.py +37 -52
  4085. mindspore/nn/wrap/loss_scale.py +72 -74
  4086. mindspore/numpy/array_creations.py +7 -7
  4087. mindspore/numpy/fft.py +1 -1
  4088. mindspore/numpy/math_ops.py +5 -5
  4089. mindspore/numpy/utils_const.py +1 -1
  4090. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  4091. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  4092. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  4093. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  4094. mindspore/ops/_op_impl/cpu/joinedstr_op.py +28 -0
  4095. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  4096. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  4097. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +461 -420
  4098. mindspore/ops/auto_generate/gen_extend_func.py +1250 -1368
  4099. mindspore/ops/auto_generate/gen_ops_def.py +6777 -6097
  4100. mindspore/ops/auto_generate/gen_ops_prim.py +16713 -15489
  4101. mindspore/ops/auto_generate/pyboost_inner_prim.py +365 -335
  4102. mindspore/ops/composite/__init__.py +10 -0
  4103. mindspore/ops/composite/base.py +9 -5
  4104. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  4105. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  4106. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  4107. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  4108. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  4109. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  4110. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  4111. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  4112. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  4113. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  4114. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  4115. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  4116. mindspore/ops/function/__init__.py +4 -1
  4117. mindspore/ops/function/_add_attr_func.py +11 -6
  4118. mindspore/ops/function/array_func.py +19 -102
  4119. mindspore/ops/function/debug_func.py +8 -5
  4120. mindspore/ops/function/grad/grad_func.py +5 -13
  4121. mindspore/ops/function/math_func.py +77 -572
  4122. mindspore/ops/function/nn_func.py +46 -94
  4123. mindspore/ops/function/other_func.py +4 -1
  4124. mindspore/ops/function/random_func.py +44 -5
  4125. mindspore/ops/function/vmap_func.py +2 -1
  4126. mindspore/ops/functional.py +4 -4
  4127. mindspore/ops/functional_overload.py +1276 -700
  4128. mindspore/ops/op_info_register.py +21 -0
  4129. mindspore/ops/operations/__init__.py +16 -11
  4130. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  4131. mindspore/ops/operations/_inner_ops.py +14 -18
  4132. mindspore/ops/operations/_sequence_ops.py +1 -1
  4133. mindspore/ops/operations/array_ops.py +5 -51
  4134. mindspore/ops/operations/comm_ops.py +186 -41
  4135. mindspore/ops/operations/custom_ops.py +303 -177
  4136. mindspore/ops/operations/debug_ops.py +59 -4
  4137. mindspore/ops/operations/image_ops.py +13 -13
  4138. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  4139. mindspore/ops/operations/math_ops.py +8 -9
  4140. mindspore/ops/operations/nn_ops.py +8 -40
  4141. mindspore/ops/primitive.py +9 -20
  4142. mindspore/ops/tensor_method.py +63 -15
  4143. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  4144. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  4145. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  4146. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  4147. mindspore/ops_generate/common/base_generator.py +14 -0
  4148. mindspore/ops_generate/common/gen_constants.py +8 -3
  4149. mindspore/ops_generate/common/gen_utils.py +0 -19
  4150. mindspore/ops_generate/common/op_proto.py +11 -4
  4151. mindspore/ops_generate/common/template.py +88 -11
  4152. mindspore/ops_generate/gen_ops.py +1 -1
  4153. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  4154. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  4155. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  4156. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  4157. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  4158. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  4159. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  4160. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  4161. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  4162. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  4163. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  4164. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  4165. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  4166. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  4167. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  4168. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  4169. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  4170. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  4171. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  4172. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  4173. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  4174. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  4175. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  4176. mindspore/parallel/_auto_parallel_context.py +16 -23
  4177. mindspore/parallel/_cell_wrapper.py +113 -45
  4178. mindspore/parallel/_parallel_serialization.py +4 -3
  4179. mindspore/parallel/_ps_context.py +4 -6
  4180. mindspore/parallel/_tensor.py +167 -12
  4181. mindspore/parallel/_transformer/moe.py +1 -1
  4182. mindspore/parallel/_transformer/transformer.py +17 -12
  4183. mindspore/parallel/_utils.py +5 -11
  4184. mindspore/parallel/auto_parallel.py +35 -14
  4185. mindspore/parallel/checkpoint_convert.py +3 -3
  4186. mindspore/parallel/checkpoint_transform.py +13 -7
  4187. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  4188. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  4189. mindspore/parallel/cluster/run.py +48 -7
  4190. mindspore/parallel/function/__init__.py +8 -1
  4191. mindspore/parallel/function/reshard_func.py +12 -12
  4192. mindspore/parallel/nn/__init__.py +15 -2
  4193. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  4194. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  4195. mindspore/parallel/shard.py +10 -25
  4196. mindspore/parallel/transform_safetensors.py +469 -174
  4197. mindspore/profiler/__init__.py +2 -1
  4198. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  4199. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  4200. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  4201. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  4202. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  4203. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  4204. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  4205. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  4206. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  4207. mindspore/profiler/analysis/task_manager.py +1 -1
  4208. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  4209. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  4210. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  4211. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  4212. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  4213. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  4214. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  4215. mindspore/profiler/common/constant.py +16 -0
  4216. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  4217. mindspore/profiler/common/path_manager.py +9 -0
  4218. mindspore/profiler/common/profiler_context.py +50 -29
  4219. mindspore/profiler/common/profiler_info.py +0 -16
  4220. mindspore/profiler/common/profiler_meta_data.py +1 -0
  4221. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  4222. mindspore/profiler/common/profiler_output_path.py +23 -8
  4223. mindspore/profiler/common/profiler_parameters.py +128 -35
  4224. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  4225. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  4226. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  4227. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  4228. mindspore/profiler/dynamic_profiler.py +374 -338
  4229. mindspore/profiler/envprofiler.py +42 -12
  4230. mindspore/profiler/experimental_config.py +112 -7
  4231. mindspore/profiler/mstx.py +33 -12
  4232. mindspore/profiler/platform/__init__.py +2 -3
  4233. mindspore/profiler/platform/cpu_profiler.py +10 -4
  4234. mindspore/profiler/platform/npu_profiler.py +30 -20
  4235. mindspore/profiler/profiler.py +218 -154
  4236. mindspore/profiler/profiler_action_controller.py +65 -77
  4237. mindspore/profiler/profiler_interface.py +2 -2
  4238. mindspore/profiler/schedule.py +10 -4
  4239. mindspore/rewrite/common/config.py +1 -0
  4240. mindspore/rewrite/common/namer.py +1 -0
  4241. mindspore/rewrite/common/namespace.py +1 -0
  4242. mindspore/rewrite/node/node.py +31 -11
  4243. mindspore/rewrite/parsers/assign_parser.py +1 -1
  4244. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  4245. mindspore/run_check/_check_version.py +7 -10
  4246. mindspore/runtime/__init__.py +8 -6
  4247. mindspore/runtime/event.py +10 -4
  4248. mindspore/runtime/executor.py +87 -45
  4249. mindspore/runtime/memory.py +31 -32
  4250. mindspore/runtime/thread_bind_core.py +299 -165
  4251. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  4252. mindspore/scipy/linalg.py +2 -2
  4253. mindspore/scipy/utils_const.py +0 -17
  4254. mindspore/train/_utils.py +17 -7
  4255. mindspore/train/amp.py +43 -23
  4256. mindspore/train/callback/__init__.py +5 -5
  4257. mindspore/train/callback/_callback.py +2 -1
  4258. mindspore/train/callback/_checkpoint.py +4 -14
  4259. mindspore/train/callback/_flops_collector.py +11 -7
  4260. mindspore/train/callback/_landscape.py +0 -1
  4261. mindspore/train/callback/_train_fault_tolerance.py +98 -21
  4262. mindspore/train/data_sink.py +15 -6
  4263. mindspore/train/dataset_helper.py +14 -5
  4264. mindspore/train/model.py +133 -69
  4265. mindspore/train/serialization.py +168 -126
  4266. mindspore/train/summary/summary_record.py +13 -2
  4267. mindspore/train/train_thor/model_thor.py +2 -2
  4268. mindspore/utils/__init__.py +3 -2
  4269. mindspore/utils/bin/dataset-cache +0 -0
  4270. mindspore/utils/bin/dataset-cache-server +0 -0
  4271. mindspore/utils/dryrun.py +0 -6
  4272. mindspore/utils/runtime_execution_order_check.py +163 -77
  4273. mindspore/utils/sdc_detect.py +68 -0
  4274. mindspore/utils/utils.py +14 -17
  4275. mindspore/version.py +1 -1
  4276. mindspore-2.7.0.dist-info/METADATA +368 -0
  4277. mindspore-2.7.0.dist-info/RECORD +12015 -0
  4278. mindspore-2.7.0.dist-info/WHEEL +5 -0
  4279. mindspore/_deprecated/__init__.py +0 -17
  4280. mindspore/_deprecated/jit.py +0 -198
  4281. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  4282. mindspore/common/auto_dynamic_shape.py +0 -504
  4283. mindspore/communication/_hccl_management.py +0 -297
  4284. mindspore/experimental/es/__init__.py +0 -22
  4285. mindspore/experimental/es/embedding_service.py +0 -891
  4286. mindspore/experimental/es/embedding_service_layer.py +0 -581
  4287. mindspore/include/mindspore/ccsrc/backend/common/graph_kernel/proactive_fallback_expander.h +0 -39
  4288. mindspore/include/mindspore/ccsrc/backend/common/session/session_context.h +0 -47
  4289. mindspore/include/mindspore/ccsrc/backend/ge_backend/pass/matmul_allreduce_add_rmsnorm_fusion.h +0 -67
  4290. mindspore/include/mindspore/ccsrc/backend/graph_compiler/backend.h +0 -124
  4291. mindspore/include/mindspore/ccsrc/backend/graph_compiler/backend_base.h +0 -205
  4292. mindspore/include/mindspore/ccsrc/backend/graph_compiler/ge_backend/ge_backend.h +0 -86
  4293. mindspore/include/mindspore/ccsrc/debug/data_dump/data_dumper.h +0 -56
  4294. mindspore/include/mindspore/ccsrc/debug/hooker/acl_data_adapter.h +0 -51
  4295. mindspore/include/mindspore/ccsrc/debug/hooker/adapter.h +0 -75
  4296. mindspore/include/mindspore/ccsrc/debug/hooker/deprecated_env.h +0 -27
  4297. mindspore/include/mindspore/ccsrc/debug/hooker/hook_debugger.h +0 -55
  4298. mindspore/include/mindspore/ccsrc/debug/hooker/hook_dynamic_loader.h +0 -52
  4299. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/decoder_k_v_cache_info.h +0 -58
  4300. mindspore/include/mindspore/ccsrc/frontend/parallel/ops_info/prompt_k_v_cache_info.h +0 -59
  4301. mindspore/include/mindspore/ccsrc/include/backend/debug/data_dump/overflow_dumper.h +0 -50
  4302. mindspore/include/mindspore/ccsrc/include/backend/device_synchronizer.h +0 -49
  4303. mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/constants.h +0 -174
  4304. mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/rdma_client.h +0 -83
  4305. mindspore/include/mindspore/ccsrc/include/backend/distributed/rpc/rdma/rdma_server.h +0 -71
  4306. mindspore/include/mindspore/ccsrc/include/common/np_dtype/np_dtypes.h +0 -42
  4307. mindspore/include/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h +0 -239
  4308. mindspore/include/mindspore/ccsrc/pipeline/jit/pi/utils/ptr_list_ref.h +0 -423
  4309. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/pipeline_jit.h +0 -68
  4310. mindspore/include/mindspore/ccsrc/pipeline/jit/ps/static_analysis/inplace_validation.h +0 -32
  4311. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h +0 -43
  4312. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.h +0 -88
  4313. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_assign.h +0 -255
  4314. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.h +0 -46
  4315. mindspore/include/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h +0 -81
  4316. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/dvm/dvm.h +0 -232
  4317. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/ge/ge_kernel_build.h +0 -28
  4318. mindspore/include/mindspore/ccsrc/plugin/device/ascend/kernel/ge/ge_kernel_mod.h +0 -78
  4319. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/base/model_creator.h +0 -27
  4320. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_core.h +0 -43
  4321. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_entity.h +0 -44
  4322. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink.h +0 -32
  4323. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink_file.h +0 -39
  4324. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_sink_stdout.h +0 -30
  4325. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/log/log_stream.h +0 -51
  4326. mindspore/include/mindspore/ccsrc/plugin/device/ascend/llm_boost/atb/include/atb_speed/utils/filesystem.h +0 -45
  4327. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.h +0 -44
  4328. mindspore/include/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/flash_attention_fusion.h +0 -73
  4329. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_hash_table.h +0 -127
  4330. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_hash_table_util.h +0 -114
  4331. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.h +0 -82
  4332. mindspore/include/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_session.h +0 -61
  4333. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.h +0 -50
  4334. mindspore/include/mindspore/ccsrc/plugin/device/cpu/kernel/custom/julia_api.h +0 -443
  4335. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_kernel_runtime.h +0 -145
  4336. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.h +0 -43
  4337. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_inference_session.h +0 -50
  4338. mindspore/include/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.h +0 -93
  4339. mindspore/include/mindspore/ccsrc/plugin/res_manager/ascend/ascend_device_address/ascend_device_synchronizer.h +0 -45
  4340. mindspore/include/mindspore/ccsrc/plugin/res_manager/cpu/cpu_device_address/cpu_device_synchronizer.h +0 -45
  4341. mindspore/include/mindspore/ccsrc/plugin/res_manager/gpu/device/gpu_device_synchronizer.h +0 -44
  4342. mindspore/include/mindspore/ccsrc/ps/core/communicator/ssl_http.h +0 -60
  4343. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/select_ext.h +0 -44
  4344. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/transpose_ext.h +0 -44
  4345. mindspore/include/mindspore/ccsrc/pyboost/auto_generate/unstack_ext.h +0 -44
  4346. mindspore/include/mindspore/ccsrc/pynative/grad/auto_grad.h +0 -77
  4347. mindspore/include/mindspore/ccsrc/pynative/grad/ir/bprop_tensor_replace.h +0 -58
  4348. mindspore/include/mindspore/ccsrc/pynative/grad/ir/dynamic_shape.h +0 -204
  4349. mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_bprop.h +0 -163
  4350. mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_grad.h +0 -114
  4351. mindspore/include/mindspore/ccsrc/pynative/grad/ir/ir_pass.h +0 -71
  4352. mindspore/include/mindspore/ccsrc/pynative/grad/jit/jit_dfunctor.h +0 -28
  4353. mindspore/include/mindspore/ccsrc/pynative/grad/variable.h +0 -466
  4354. mindspore/include/mindspore/ccsrc/pynative/op_function/auto_generate/pyboost_functions.h +0 -1019
  4355. mindspore/include/mindspore/ccsrc/runtime/device/kernel_runtime.h +0 -223
  4356. mindspore/include/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h +0 -71
  4357. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h +0 -78
  4358. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_switch_actor.h +0 -91
  4359. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/actor/custom_actor.h +0 -66
  4360. mindspore/include/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h +0 -81
  4361. mindspore/include/mindspore/ccsrc/runtime/hardware/deprecated_interface.h +0 -47
  4362. mindspore/include/mindspore/core/include/base/fp8_e4m3.h +0 -263
  4363. mindspore/include/mindspore/core/include/base/fp8_e5m2.h +0 -258
  4364. mindspore/include/mindspore/core/include/ir/base_tensor.h +0 -1073
  4365. mindspore/include/mindspore/ops/infer/masked_scatter.h +0 -44
  4366. mindspore/include/mindspore/ops/infer/ops_func_impl/decoder_k_v_cache.h +0 -40
  4367. mindspore/include/mindspore/ops/infer/ops_func_impl/gmm_backward.h +0 -28
  4368. mindspore/include/mindspore/ops/infer/ops_func_impl/gmm_v2_backward.h +0 -28
  4369. mindspore/include/mindspore/ops/infer/ops_func_impl/moe_token_unpermute.h +0 -36
  4370. mindspore/include/mindspore/ops/infer/ops_func_impl/prompt_k_v_cache.h +0 -40
  4371. mindspore/include/mindspore/ops/infer/ops_func_impl/select_ext.h +0 -40
  4372. mindspore/include/mindspore/ops/infer/ops_func_impl/transpose_ext.h +0 -35
  4373. mindspore/include/mindspore/ops/infer/ops_func_impl/unstack_ext.h +0 -38
  4374. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/densetodense_set_operation.h +0 -47
  4375. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/densetosparsesetoperation.h +0 -74
  4376. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/lu.h +0 -35
  4377. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/ragged_tensor_to_sparse.h +0 -92
  4378. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/ragged_tensor_to_tensor.h +0 -120
  4379. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sample_distorted_bounding_box_ext2.h +0 -103
  4380. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/scale_and_translate.h +0 -77
  4381. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_cross.h +0 -111
  4382. mindspore/include/mindspore/ops/kernel/ascend/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_sparse_maximum.h +0 -61
  4383. mindspore/include/mindspore/ops/kernel/ascend/ascendc/op_host/decoder_kv_cache_tiling.h +0 -40
  4384. mindspore/include/mindspore/ops/kernel/ascend/ascendc/op_host/prompt_kv_cache_tiling.h +0 -39
  4385. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/conv3d_padding_aclnn_kernel.h +0 -77
  4386. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/moe_token_unpermute_aclnn_kernel.h +0 -45
  4387. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn/view/contiguous.h +0 -41
  4388. mindspore/include/mindspore/ops/kernel/ascend/opapi/aclnn_auto_gen/select_ext_aclnn_kernel.h +0 -41
  4389. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/select_ext.h +0 -40
  4390. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/transpose_ext.h +0 -40
  4391. mindspore/include/mindspore/ops/kernel/ascend/pyboost/auto_generate/unstack_ext.h +0 -40
  4392. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/customize_copy.h +0 -37
  4393. mindspore/include/mindspore/ops/kernel/ascend/pyboost/customize/unstack_ext.h +0 -36
  4394. mindspore/include/mindspore/ops/kernel/cpu/nnacl/arithmetic_self_parameter.h +0 -30
  4395. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/batch_to_space_base.h +0 -33
  4396. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/cast_base.h +0 -74
  4397. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/concat_base.h +0 -32
  4398. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/conv1x1_base.h +0 -32
  4399. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/conv_common_base.h +0 -41
  4400. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/crop_base.h +0 -35
  4401. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/depth_to_space_base.h +0 -31
  4402. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/fill_base.h +0 -33
  4403. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/format_transpose.h +0 -30
  4404. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/gather_d_base.h +0 -55
  4405. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/minimal_filtering_generator.h +0 -58
  4406. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/scatter_nd_binary.h +0 -37
  4407. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/space_to_depth_base.h +0 -31
  4408. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/split_with_over_lap_base.h +0 -33
  4409. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/stack_base.h +0 -30
  4410. mindspore/include/mindspore/ops/kernel/cpu/nnacl/base/transpose_base.h +0 -69
  4411. mindspore/include/mindspore/ops/kernel/cpu/nnacl/batchnorm_parameter.h +0 -29
  4412. mindspore/include/mindspore/ops/kernel/cpu/nnacl/call_parameter.h +0 -28
  4413. mindspore/include/mindspore/ops/kernel/cpu/nnacl/clip_parameter.h +0 -29
  4414. mindspore/include/mindspore/ops/kernel/cpu/nnacl/conv3d_parameter.h +0 -26
  4415. mindspore/include/mindspore/ops/kernel/cpu/nnacl/cumsum_parameter.h +0 -29
  4416. mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_gru_parameter.h +0 -31
  4417. mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_is_inf_parameter.h +0 -26
  4418. mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_masked_fill_parameter.h +0 -26
  4419. mindspore/include/mindspore/ops/kernel/cpu/nnacl/custom_parameter.h +0 -30
  4420. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fill_parameter.h +0 -25
  4421. mindspore/include/mindspore/ops/kernel/cpu/nnacl/format_transpose_parameter.h +0 -29
  4422. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/activation_fp16.h +0 -43
  4423. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/arg_min_max_fp16.h +0 -33
  4424. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/arithmetic_self_fp16.h +0 -57
  4425. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/batchnorm_fp16.h +0 -36
  4426. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/cast_fp16.h +0 -94
  4427. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/common_func_fp16.h +0 -40
  4428. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/constant_of_shape_fp16.h +0 -38
  4429. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/conv_depthwise_fp16.h +0 -65
  4430. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/conv_fp16.h +0 -60
  4431. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/crop_fp16.h +0 -26
  4432. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/custom_gru_fp16.h +0 -32
  4433. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/deconv_fp16.h +0 -36
  4434. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/deconv_winograd_fp16.h +0 -48
  4435. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/dynamic_quant_fp16.h +0 -35
  4436. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/fill_fp16.h +0 -34
  4437. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/gru_fp16.h +0 -30
  4438. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/instance_norm_fp16.h +0 -32
  4439. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/layer_norm_fp16.h +0 -33
  4440. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/log_softmax_fp16.h +0 -35
  4441. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/lstm_fp16.h +0 -54
  4442. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/matmul_fp16.h +0 -128
  4443. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/matrix_fp16.h +0 -36
  4444. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/pack_fp16.h +0 -93
  4445. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/pad_fp16.h +0 -32
  4446. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/power_fp16.h +0 -64
  4447. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/prelu_fp16.h +0 -31
  4448. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/quant_dtype_cast_fp16.h +0 -35
  4449. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/range_fp16.h +0 -27
  4450. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/reduce_fp16.h +0 -41
  4451. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/resize_fp16.h +0 -56
  4452. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/scale_fp16.h +0 -38
  4453. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/softmax_fp16.h +0 -35
  4454. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/sparse_to_dense_fp16.h +0 -31
  4455. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/splice_fp16.h +0 -31
  4456. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/topk_fp16.h +0 -35
  4457. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/transpose_fp16.h +0 -35
  4458. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/unique_fp16.h +0 -29
  4459. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/utils_fp16.h +0 -25
  4460. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/where_fp16.h +0 -32
  4461. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/winograd_transform_fp16.h +0 -57
  4462. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16/winograd_utils_fp16.h +0 -571
  4463. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/activation_grad_fp16.h +0 -44
  4464. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/arithmetic_grad.h +0 -41
  4465. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/arithmetic_self_grad.h +0 -39
  4466. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/batch_norm.h +0 -40
  4467. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/convolution_grad_filter.h +0 -33
  4468. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/convolution_grad_input.h +0 -33
  4469. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/dropout_grad.h +0 -32
  4470. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/gemm_fp16.h +0 -46
  4471. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/layernorm_grad.h +0 -32
  4472. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/pack_fp16_ext.h +0 -37
  4473. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/pooling_grad.h +0 -34
  4474. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/resize_grad.h +0 -45
  4475. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/strided_slice_grad.h +0 -31
  4476. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp16_grad/unsorted_segment_sum.h +0 -31
  4477. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/adder_fp32.h +0 -47
  4478. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/arg_min_max_fp32.h +0 -34
  4479. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/arithmetic_compare_fp32.h +0 -77
  4480. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/attention_fp32.h +0 -72
  4481. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/batchnorm_fp32.h +0 -40
  4482. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/bias_add.h +0 -34
  4483. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/common_func_fp32.h +0 -106
  4484. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/constant_of_shape_fp32.h +0 -52
  4485. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_1x1_avx_fp32.h +0 -40
  4486. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_1x1_x86_fp32.h +0 -21
  4487. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_common_fp32.h +0 -60
  4488. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_depthwise_avx_fp32.h +0 -37
  4489. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_depthwise_fp32.h +0 -148
  4490. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_im2col_avx512_fp32.h +0 -38
  4491. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_im2col_fp32.h +0 -33
  4492. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw.h +0 -132
  4493. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw_arm64_fp32.h +0 -33
  4494. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_sw_avx_fp32.h +0 -42
  4495. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/conv_winograd_fp32.h +0 -48
  4496. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/crop_fp32.h +0 -34
  4497. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/cumsum_fp32.h +0 -32
  4498. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/custom_gru_fp32.h +0 -32
  4499. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/deconv_fp32.h +0 -37
  4500. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/deconv_winograd_fp32.h +0 -46
  4501. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/detection_post_process_fp32.h +0 -60
  4502. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/embedding_lookup_fp32.h +0 -43
  4503. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/group_norm_fp32.h +0 -35
  4504. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/gru_fp32.h +0 -30
  4505. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/instance_norm_fp32.h +0 -50
  4506. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/invert_permutation_fp32.h +0 -30
  4507. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/l2_norm_fp32.h +0 -34
  4508. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/layer_norm_fp32.h +0 -33
  4509. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/local_response_norm_fp32.h +0 -26
  4510. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/log_softmax_fp32.h +0 -31
  4511. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/matmul_avx512_mask_fp32.h +0 -209
  4512. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/non_max_suppression_fp32.h +0 -25
  4513. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/cast_gather_reduce_fp32.h +0 -37
  4514. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/reduce_concat_fp32.h +0 -34
  4515. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/online_fusion/split_reduce_concat_fp32.h +0 -33
  4516. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/pad_fp32.h +0 -40
  4517. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/prelu_fp32.h +0 -31
  4518. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/prior_box_fp32.h +0 -41
  4519. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/range_fp32.h +0 -34
  4520. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/rank_fp32.h +0 -32
  4521. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/resize_fp32.h +0 -74
  4522. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/reverse_fp32.h +0 -31
  4523. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/reverse_sequence_fp32.h +0 -33
  4524. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/scale_fp32.h +0 -35
  4525. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/sparse_to_dense_fp32.h +0 -31
  4526. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/splice_fp32.h +0 -26
  4527. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/transpose_fp32.h +0 -35
  4528. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/transpose_server_fp32.h +0 -40
  4529. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/triu_tril_fp32.h +0 -42
  4530. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/unique_fp32.h +0 -36
  4531. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/where_fp32.h +0 -32
  4532. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_avx.h +0 -299
  4533. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_transform.h +0 -51
  4534. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32/winograd_utils.h +0 -373
  4535. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/batch_norm_grad.h +0 -37
  4536. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/batch_norm_parameter.h +0 -28
  4537. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/binary_cross_entropy_grad.h +0 -36
  4538. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/convolution_grad_filter.h +0 -32
  4539. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/convolution_grad_input.h +0 -32
  4540. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/dropout_parameter.h +0 -27
  4541. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/layernorm_grad.h +0 -29
  4542. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/nllloss_grad_fp32.h +0 -31
  4543. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/optimizer.h +0 -40
  4544. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/pack_ext.h +0 -39
  4545. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/pooling_grad.h +0 -34
  4546. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/reduce_grad.h +0 -30
  4547. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/smooth_l1_loss.h +0 -27
  4548. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/softmax_cross_entropy_with_logits.h +0 -33
  4549. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_grad/softmax_grad_utils.h +0 -33
  4550. mindspore/include/mindspore/ops/kernel/cpu/nnacl/fp32_sparse/matmul_sparse_x1_fp32.h +0 -41
  4551. mindspore/include/mindspore/ops/kernel/cpu/nnacl/gather_nd_parameter.h +0 -26
  4552. mindspore/include/mindspore/ops/kernel/cpu/nnacl/gelu_parameter.h +0 -28
  4553. mindspore/include/mindspore/ops/kernel/cpu/nnacl/grid_sampler_parameter.h +0 -28
  4554. mindspore/include/mindspore/ops/kernel/cpu/nnacl/group_norm_parameter.h +0 -41
  4555. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/cast_gather_reduce_infer.h +0 -31
  4556. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_infer.h +0 -31
  4557. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_read_infer.h +0 -31
  4558. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensor_array_write_infer.h +0 -31
  4559. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_fromtensor_infer.h +0 -31
  4560. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_getitem_infer.h +0 -32
  4561. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_reserve_infer.h +0 -31
  4562. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_setitem_infer.h +0 -31
  4563. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/control/tensorlist_stack_infer.h +0 -31
  4564. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/conv3d_infer.h +0 -32
  4565. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_is_inf_infer.h +0 -31
  4566. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_masked_fill_infer.h +0 -31
  4567. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/custom_tensor_scatter_max_infer.h +0 -31
  4568. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/format_transpose_infer.h +0 -31
  4569. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/gather_d_infer.h +0 -33
  4570. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/grid_sampler_infer.h +0 -32
  4571. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/group_norm_infer.h +0 -31
  4572. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/reduce_concat_infer.h +0 -31
  4573. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_fill_empty_rows_infer.h +0 -31
  4574. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_reshape_infer.h +0 -31
  4575. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/sparse_segment_sum_infer.h +0 -31
  4576. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/split_reduce_concat_infer.h +0 -31
  4577. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_extract_features_infer.h +0 -31
  4578. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_normalize_infer.h +0 -32
  4579. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/custom_predict_infer.h +0 -36
  4580. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/hashtable_lookup_infer.h +0 -31
  4581. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/lsh_projection_infer.h +0 -32
  4582. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/string/skip_gram_infer.h +0 -31
  4583. mindspore/include/mindspore/ops/kernel/cpu/nnacl/infer/triu_tril_infer.h +0 -32
  4584. mindspore/include/mindspore/ops/kernel/cpu/nnacl/instance_norm_parameter.h +0 -32
  4585. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/add_int8.h +0 -70
  4586. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arg_min_max_int8.h +0 -41
  4587. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arithmetic_int8.h +0 -51
  4588. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/arithmetic_self_int8.h +0 -59
  4589. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/batch_to_space_int8.h +0 -33
  4590. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/batchnorm_int8.h +0 -34
  4591. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/common_func_int8.h +0 -95
  4592. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/concat_int8.h +0 -33
  4593. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv1x1_int8.h +0 -46
  4594. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv3x3_int8.h +0 -48
  4595. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv_depthwise_int8.h +0 -49
  4596. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/conv_int8.h +0 -44
  4597. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/crop_int8.h +0 -31
  4598. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/deconv_int8.h +0 -46
  4599. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/depth_to_space_int8.h +0 -32
  4600. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/div_int8.h +0 -37
  4601. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_gather_int8.h +0 -40
  4602. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_matmul_int8.h +0 -74
  4603. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/dynamic_quant_int8.h +0 -34
  4604. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/gatherNd_int8.h +0 -32
  4605. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/gather_int8.h +0 -35
  4606. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/hswish_int8.h +0 -43
  4607. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/l2_norm_int8.h +0 -32
  4608. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/layer_norm_int8.h +0 -35
  4609. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/leaky_relu_int8.h +0 -31
  4610. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/matmul_int8.h +0 -93
  4611. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/mul_int8.h +0 -39
  4612. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pack_int8.h +0 -56
  4613. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pad_int8.h +0 -35
  4614. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/pooling_int8.h +0 -50
  4615. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/power_int8.h +0 -33
  4616. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/quant_dtype_cast_int8.h +0 -56
  4617. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/reduce_int8.h +0 -70
  4618. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/relux_int8.h +0 -43
  4619. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/reshape_int8.h +0 -32
  4620. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/resize_int8.h +0 -50
  4621. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/scale_int8.h +0 -35
  4622. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/sigmoid_int8.h +0 -32
  4623. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/slice_int8.h +0 -35
  4624. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/softmax_int8.h +0 -35
  4625. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/space_to_batch_int8.h +0 -32
  4626. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/split_int8.h +0 -33
  4627. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/squeeze_int8.h +0 -32
  4628. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/sub_int8.h +0 -32
  4629. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/tanh_int8.h +0 -43
  4630. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/topk_int8.h +0 -36
  4631. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/transpose_int8.h +0 -36
  4632. mindspore/include/mindspore/ops/kernel/cpu/nnacl/int8/unsqueeze_int8.h +0 -33
  4633. mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/avx/common_utils.h +0 -157
  4634. mindspore/include/mindspore/ops/kernel/cpu/nnacl/intrinsics/sse/sse_common.h +0 -390
  4635. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/activation.h +0 -25
  4636. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/addn.h +0 -35
  4637. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arg_min_max.h +0 -63
  4638. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arithmetic_compare.h +0 -26
  4639. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/arithmetic_self.h +0 -48
  4640. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/batch_norm.h +0 -38
  4641. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/batch_to_space.h +0 -33
  4642. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/biasadd.h +0 -25
  4643. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/cast.h +0 -32
  4644. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/clip.h +0 -34
  4645. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/concat.h +0 -52
  4646. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_1x1.h +0 -42
  4647. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_base.h +0 -63
  4648. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_delegate.h +0 -39
  4649. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise.h +0 -36
  4650. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_3x3.h +0 -37
  4651. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_indirect.h +0 -39
  4652. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_sw.h +0 -36
  4653. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_depthwise_sw_avx.h +0 -40
  4654. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col.h +0 -28
  4655. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_arm32.h +0 -30
  4656. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_arm64.h +0 -29
  4657. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_avx.h +0 -29
  4658. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_avx512.h +0 -29
  4659. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_base.h +0 -52
  4660. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_im2col_sse.h +0 -29
  4661. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_slidewindow.h +0 -46
  4662. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_1x1.h +0 -36
  4663. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_arm64.h +0 -28
  4664. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_sw_avx.h +0 -28
  4665. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd.h +0 -32
  4666. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_arm32.h +0 -30
  4667. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_arm64.h +0 -30
  4668. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_avx.h +0 -30
  4669. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_base.h +0 -65
  4670. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/convolution_winograd_sse.h +0 -30
  4671. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/crop.h +0 -31
  4672. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/crop_and_resize.h +0 -41
  4673. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution.h +0 -39
  4674. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution_depthwise.h +0 -34
  4675. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/deconvolution_winograd.h +0 -52
  4676. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/depth_to_space.h +0 -42
  4677. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/arithmetic_compare_f16.h +0 -26
  4678. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/arithmetic_f16.h +0 -42
  4679. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/concat_f16.h +0 -25
  4680. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/reduce_f16.h +0 -27
  4681. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/f16/stack_f16.h +0 -32
  4682. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fill.h +0 -36
  4683. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fullconnection.h +0 -25
  4684. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/fused_batch_norm.h +0 -37
  4685. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather.h +0 -46
  4686. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather_d.h +0 -25
  4687. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/gather_nd.h +0 -35
  4688. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/group_convolution.h +0 -49
  4689. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/group_norm.h +0 -31
  4690. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/init_vs_kernels.h +0 -20
  4691. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/layer_norm.h +0 -49
  4692. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/local_response_norm.h +0 -30
  4693. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/log_softmax.h +0 -31
  4694. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul.h +0 -25
  4695. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_arm32.h +0 -28
  4696. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_arm64.h +0 -28
  4697. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_avx.h +0 -28
  4698. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_avx512.h +0 -27
  4699. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_base.h +0 -35
  4700. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_create.h +0 -24
  4701. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/matmul_sse.h +0 -27
  4702. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/non_max_suppression.h +0 -34
  4703. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/non_zero.h +0 -30
  4704. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/ones_like.h +0 -31
  4705. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/pad.h +0 -51
  4706. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/pow.h +0 -31
  4707. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/prelu.h +0 -34
  4708. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/prior_box.h +0 -36
  4709. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/range.h +0 -31
  4710. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/rank.h +0 -31
  4711. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/reduce.h +0 -72
  4712. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/reverse.h +0 -36
  4713. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/scale.h +0 -41
  4714. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/shape.h +0 -31
  4715. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/size.h +0 -30
  4716. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/softmax.h +0 -39
  4717. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/splice.h +0 -30
  4718. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/stack.h +0 -41
  4719. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/transpose.h +0 -49
  4720. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/tril.h +0 -32
  4721. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/triu.h +0 -32
  4722. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/unique.h +0 -32
  4723. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/where.h +0 -44
  4724. mindspore/include/mindspore/ops/kernel/cpu/nnacl/kernel/zeros_like.h +0 -31
  4725. mindspore/include/mindspore/ops/kernel/cpu/nnacl/l2_norm_parameter.h +0 -41
  4726. mindspore/include/mindspore/ops/kernel/cpu/nnacl/local_response_norm_parameter.h +0 -31
  4727. mindspore/include/mindspore/ops/kernel/cpu/nnacl/lsh_projection_parameter.h +0 -35
  4728. mindspore/include/mindspore/ops/kernel/cpu/nnacl/mul_parameter.h +0 -32
  4729. mindspore/include/mindspore/ops/kernel/cpu/nnacl/non_max_suppression_parameter.h +0 -28
  4730. mindspore/include/mindspore/ops/kernel/cpu/nnacl/pack.h +0 -23
  4731. mindspore/include/mindspore/ops/kernel/cpu/nnacl/partial_fusion_parameter.h +0 -29
  4732. mindspore/include/mindspore/ops/kernel/cpu/nnacl/predict_parameter.h +0 -32
  4733. mindspore/include/mindspore/ops/kernel/cpu/nnacl/prelu_parameter.h +0 -26
  4734. mindspore/include/mindspore/ops/kernel/cpu/nnacl/random_parameter.h +0 -34
  4735. mindspore/include/mindspore/ops/kernel/cpu/nnacl/reverse_parameter.h +0 -30
  4736. mindspore/include/mindspore/ops/kernel/cpu/nnacl/reverse_sequence_parameter.h +0 -45
  4737. mindspore/include/mindspore/ops/kernel/cpu/nnacl/scale_parameter.h +0 -39
  4738. mindspore/include/mindspore/ops/kernel/cpu/nnacl/scatter_elements_parameter.h +0 -25
  4739. mindspore/include/mindspore/ops/kernel/cpu/nnacl/scatter_nd_parameter.h +0 -29
  4740. mindspore/include/mindspore/ops/kernel/cpu/nnacl/sigmoid_parameter.h +0 -41
  4741. mindspore/include/mindspore/ops/kernel/cpu/nnacl/skip_gram_parameter.h +0 -30
  4742. mindspore/include/mindspore/ops/kernel/cpu/nnacl/sparse_to_dense_parameter.h +0 -32
  4743. mindspore/include/mindspore/ops/kernel/cpu/nnacl/tensor_array_parameter.h +0 -29
  4744. mindspore/include/mindspore/ops/kernel/cpu/nnacl/triu_tril_parameter.h +0 -31
  4745. mindspore/include/mindspore/ops/kernel/cpu/nnacl/upsample_parameter.h +0 -29
  4746. mindspore/include/mindspore/ops/kernel/cpu/nnacl/where_parameter.h +0 -25
  4747. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gmm_backward.h +0 -38
  4748. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/gmm_v2_backward.h +0 -38
  4749. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/select_ext.h +0 -38
  4750. mindspore/include/mindspore/ops/kernel/cpu/pyboost/auto_generate/transpose_ext.h +0 -38
  4751. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gmm_backward.h +0 -38
  4752. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/gmm_v2_backward.h +0 -38
  4753. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/select_ext.h +0 -38
  4754. mindspore/include/mindspore/ops/kernel/gpu/pyboost/auto_generate/transpose_ext.h +0 -38
  4755. mindspore/include/mindspore/ops/ops_utils/ms_extension.h +0 -39
  4756. mindspore/include/mindspore/ops/view/select_ext_strides_calc.h +0 -30
  4757. mindspore/include/mindspore/ops/view/transpose_ext_strides_calc.h +0 -32
  4758. mindspore/include/mindspore/ops/view/unstack_ext_strides_calc.h +0 -28
  4759. mindspore/include/third_party/securec/src/secinput.h +0 -181
  4760. mindspore/include/third_party/securec/src/securecutil.h +0 -574
  4761. mindspore/include/third_party/securec/src/secureprintoutput.h +0 -153
  4762. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +0 -123
  4763. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -152
  4764. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -2048
  4765. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json +0 -2048
  4766. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -2048
  4767. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +0 -46
  4768. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +0 -51
  4769. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/build_tbe_kernel.py +0 -529
  4770. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/compiler.py +0 -56
  4771. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/custom.py +0 -1109
  4772. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/get_file_path.py +0 -36
  4773. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -241
  4774. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +0 -212
  4775. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/tbe_topi.py +0 -556
  4776. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +0 -46
  4777. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +0 -51
  4778. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -241
  4779. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +0 -212
  4780. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  4781. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +0 -10
  4782. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/include/aclnn_decoder_kv_cache.h +0 -59
  4783. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/include/aclnn_prompt_kv_cache.h +0 -59
  4784. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  4785. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -182
  4786. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/decoder_kv_cache.cpp +0 -192
  4787. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/decoder_kv_cache.py +0 -215
  4788. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/prompt_kv_cache.cpp +0 -274
  4789. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl/dynamic/prompt_kv_cache.py +0 -215
  4790. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -158
  4791. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  4792. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -158
  4793. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  4794. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -158
  4795. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  4796. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -158
  4797. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  4798. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -158
  4799. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  4800. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -158
  4801. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  4802. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -158
  4803. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  4804. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -158
  4805. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  4806. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -167
  4807. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  4808. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -167
  4809. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  4810. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -167
  4811. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  4812. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -167
  4813. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  4814. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -167
  4815. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  4816. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -167
  4817. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  4818. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -167
  4819. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  4820. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -167
  4821. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  4822. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +0 -302
  4823. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +0 -892
  4824. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +0 -892
  4825. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  4826. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  4827. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +0 -33
  4828. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  4829. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +0 -1
  4830. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +0 -14
  4831. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +0 -59
  4832. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +0 -59
  4833. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +0 -192
  4834. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +0 -215
  4835. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +0 -274
  4836. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +0 -215
  4837. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -158
  4838. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  4839. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -158
  4840. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  4841. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -158
  4842. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  4843. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -158
  4844. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  4845. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -158
  4846. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  4847. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -158
  4848. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  4849. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -158
  4850. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  4851. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -158
  4852. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  4853. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -167
  4854. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  4855. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -167
  4856. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  4857. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -167
  4858. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  4859. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -167
  4860. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  4861. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -167
  4862. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  4863. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -167
  4864. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  4865. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -167
  4866. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  4867. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -167
  4868. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  4869. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -156
  4870. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  4871. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -156
  4872. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  4873. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -156
  4874. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  4875. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -156
  4876. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  4877. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -156
  4878. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  4879. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -156
  4880. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  4881. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -156
  4882. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  4883. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -156
  4884. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  4885. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -165
  4886. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  4887. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -165
  4888. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  4889. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -165
  4890. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  4891. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -165
  4892. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  4893. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -165
  4894. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  4895. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -165
  4896. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  4897. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -165
  4898. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  4899. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -165
  4900. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  4901. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +0 -156
  4902. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  4903. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +0 -156
  4904. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  4905. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +0 -156
  4906. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  4907. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +0 -156
  4908. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  4909. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +0 -156
  4910. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  4911. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +0 -156
  4912. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  4913. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +0 -156
  4914. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  4915. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +0 -156
  4916. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  4917. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +0 -165
  4918. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  4919. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +0 -165
  4920. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  4921. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +0 -165
  4922. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  4923. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +0 -165
  4924. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  4925. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +0 -165
  4926. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  4927. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +0 -165
  4928. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  4929. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +0 -165
  4930. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  4931. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +0 -165
  4932. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  4933. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +0 -892
  4934. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +0 -892
  4935. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/decoder_kv_cache.json +0 -892
  4936. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/prompt_kv_cache.json +0 -892
  4937. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +0 -892
  4938. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +0 -892
  4939. mindspore/profiler/common/validator/__init__.py +0 -14
  4940. mindspore/profiler/common/validator/validate_path.py +0 -84
  4941. mindspore/profiler/parser/__init__.py +0 -14
  4942. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  4943. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  4944. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  4945. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  4946. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  4947. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  4948. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  4949. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  4950. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  4951. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  4952. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  4953. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  4954. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  4955. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  4956. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  4957. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  4958. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  4959. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  4960. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  4961. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  4962. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  4963. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  4964. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  4965. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  4966. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  4967. mindspore/profiler/parser/container.py +0 -229
  4968. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  4969. mindspore/profiler/parser/flops_parser.py +0 -531
  4970. mindspore/profiler/parser/framework_enum.py +0 -111
  4971. mindspore/profiler/parser/framework_parser.py +0 -464
  4972. mindspore/profiler/parser/framework_struct.py +0 -61
  4973. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  4974. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  4975. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  4976. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  4977. mindspore/profiler/parser/hccl_parser.py +0 -573
  4978. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  4979. mindspore/profiler/parser/integrator.py +0 -526
  4980. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  4981. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  4982. mindspore/profiler/parser/minddata_parser.py +0 -186
  4983. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  4984. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  4985. mindspore/profiler/parser/optime_parser.py +0 -250
  4986. mindspore/profiler/parser/profiler_info.py +0 -213
  4987. mindspore/profiler/parser/step_trace_parser.py +0 -666
  4988. mindspore/utils/hooks.py +0 -81
  4989. mindspore-2.6.0rc1.dist-info/METADATA +0 -367
  4990. mindspore-2.6.0rc1.dist-info/RECORD +0 -12175
  4991. mindspore-2.6.0rc1.dist-info/WHEEL +0 -5
  4992. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  4993. /mindspore/include/mindspore/ops/kernel/ascend/ascendc/{op_host → all_finite/op_host}/all_finite_tiling.h +0 -0
  4994. /mindspore/include/third_party/{securec/include → include}/securec.h +0 -0
  4995. /mindspore/include/third_party/{securec/include → include}/securectype.h +0 -0
  4996. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  4997. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,11 @@
1
1
  /*
2
2
  * Copyright (c) 2024 Huawei Technologies Co., Ltd.
3
- * AscendTransformerBoost is licensed under Mulan PSL v2.
4
- * You can use this software according to the terms and conditions of the Mulan PSL v2.
5
- * You may obtain a copy of Mulan PSL v2 at:
6
- * http://license.coscl.org.cn/MulanPSL2
7
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
8
- * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
9
- * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
10
- * See the Mulan PSL v2 for more details.
3
+ * This file is a part of the CANN Open Software.
4
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
5
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
6
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
7
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
8
+ * See LICENSE in the root of the software repository for the full text of the License.
11
9
  */
12
10
  #ifndef ATB_INFEROPPARAM_H
13
11
  #define ATB_INFEROPPARAM_H
@@ -33,6 +31,16 @@ namespace atb {
33
31
 
34
32
  namespace infer {
35
33
 
34
+ //!
35
+ //! \enum InputLayout
36
+ //!
37
+ //! \brief 数据排布类型
38
+ //!
39
+ enum InputLayout : int {
40
+ TYPE_BSND = 0, //!< 默认值,表示数据排布为BSND
41
+ TYPE_BNSD //!< 表示数据排布为BNSD
42
+ };
43
+
36
44
  //!
37
45
  //! \enum QuantType
38
46
  //!
@@ -40,11 +48,12 @@ namespace infer {
40
48
  //!
41
49
  enum QuantType : int {
42
50
  QUANT_UNDEFINED = 0, //!< 不量化
43
- QUANT_INT4, //!< 当前不支持
44
- QUANT_INT8, //!< int8量化
45
- QUANT_INT16, //!< 当前不支持
46
- QUANT_FLOAT8, //!< 当前不支持
47
- QUANT_FLOAT16, //!< 当前不支持
51
+ QUANT_UNQUANT = 0, //!< 不量化
52
+ QUANT_INT4 = 1, //!< 当前不支持
53
+ QUANT_INT8 = 2, //!< int8量化
54
+ QUANT_INT16 = 3, //!< 当前不支持
55
+ QUANT_FLOAT8 = 4, //!< 当前不支持
56
+ QUANT_FLOAT16 = 5, //!< 当前不支持
48
57
  };
49
58
 
50
59
  //!
@@ -62,22 +71,23 @@ enum DynamicQuantType : int {
62
71
  //! \enum ActivationType
63
72
  //!
64
73
  //! \brief 激活支持的类型
65
- //!
66
- //! ACTIVATION_SWIGLU_FORWARD: Atlas 300I DUO中只支持32位对齐的数据、Atlas 300I DUO中不支持bfloat16类型数据
67
- //! ACTIVATION_SWIGLU_BACKWARD: 只支持Atlas 800I A2
68
- //! ACTIVATION_GELU: bf16只支持Atlas 800I A2
74
+ //! ACTIVATION_FAST_GELU:快速运算的Gelu激活函数,对Tensor内每个element做Gelu激活函数近似计算,计算速度更快,同时保持较高的准确性。
75
+ //! ACTIVATION_SWIGLU_FORWARD: Swiglu正向激活函数。Atlas 推理系列产品中只支持32位对齐的数据。
76
+ //! ACTIVATION_FASTER_GELU_FORWARD: 简化后的FastGelu激活函数,计算速度更快。
77
+ //! ACTIVATION_SWIGLU_BACKWARD: Swiglu正向激活函数的反向,求梯度时使用。只支持Atlas 800I A2推理产品。
69
78
  //!
70
79
  enum ActivationType : int {
71
- ACTIVATION_UNDEFINED = 0, //!< 未定义
72
- ACTIVATION_RELU, //!< RELU激活类型
73
- ACTIVATION_GELU, //!< GELU激活类型
74
- ACTIVATION_FAST_GELU, //!< FAST_GELU激活类型
75
- ACTIVATION_SWISH, //!< SWISH激活类型
76
- ACTIVATION_LOG, //!< LOG激活类型
77
- ACTIVATION_SWIGLU_FORWARD, //!< SWIGLU_FORWARD激活类型
78
- ACTIVATION_SWIGLU_BACKWARD, //!< SWIGLU_BACKWARD激活类型
79
- ACTIVATION_SIGMOID, //!< SIGMOID激活类型
80
- ACTIVATION_MAX, //!< 枚举最大值
80
+ ACTIVATION_UNDEFINED = 0, //!< 未定义
81
+ ACTIVATION_RELU, //!< RELU激活类型
82
+ ACTIVATION_GELU, //!< GELU激活类型
83
+ ACTIVATION_FAST_GELU, //!< FAST_GELU激活类型
84
+ ACTIVATION_SWISH, //!< SWISH激活类型
85
+ ACTIVATION_LOG, //!< LOG激活类型
86
+ ACTIVATION_SWIGLU_FORWARD, //!< SWIGLU_FORWARD激活类型
87
+ ACTIVATION_SWIGLU_BACKWARD, //!< SWIGLU_BACKWARD激活类型
88
+ ACTIVATION_SIGMOID, //!< SIGMOID激活类型
89
+ ACTIVATION_FASTER_GELU_FORWARD, //!< FASTER_GELU_FORWARD激活类型
90
+ ACTIVATION_MAX, //!< 枚举最大值, 非激活类型
81
91
  };
82
92
 
83
93
  //!
@@ -98,8 +108,8 @@ struct ActivationParam {
98
108
  //! \enum GeLUMode
99
109
  //! \brief GeLU激活函数可选的计算模式
100
110
  enum GeLUMode : int {
101
- TANH_MODE = 0, //!< 默认值,使用tanh估算
102
- NONE_MODE, //!< 原GeLU计算公式
111
+ TANH_MODE = 0, //!< 默认值,使用tanh估算
112
+ NONE_MODE, //!< 原GeLU计算公式
103
113
  };
104
114
  //! 激活函数类型,ActivationType类型枚举值.
105
115
  ActivationType activationType = ACTIVATION_UNDEFINED;
@@ -109,6 +119,10 @@ struct ActivationParam {
109
119
  int32_t dim = -1;
110
120
  //! GeLU模式选择参数
111
121
  GeLUMode geluMode = TANH_MODE;
122
+ //!
123
+ //! \brief 预留参数
124
+ //!
125
+ uint8_t rsv[8] = {0};
112
126
  };
113
127
 
114
128
  //!
@@ -135,6 +149,10 @@ struct AsStridedParam {
135
149
  //! \warning offset的长度要求为1且元素要求大于或等于0.
136
150
  //!
137
151
  SVector<int64_t> offset;
152
+ //!
153
+ //! \brief 预留参数
154
+ //!
155
+ uint8_t rsv[8] = {0};
138
156
  };
139
157
 
140
158
  //!
@@ -156,9 +174,27 @@ struct CumsumParam {
156
174
  //!
157
175
  //! \brief 正向累加或逆向累加,默认为false.
158
176
  //!
159
- //! \note true:输出正向累加(暂不支持) false:输出逆向累加.
177
+ //! \note true:输出逆向累加(暂不支持) false:输出正向累加.
160
178
  //!
161
179
  bool reverse = false;
180
+ //!
181
+ //! \brief 预留参数
182
+ //!
183
+ uint8_t rsv[14] = {0};
184
+ };
185
+
186
+ //!
187
+ //! \brief 推理的长度大于训练长度时,embedding需要进行特殊处理。
188
+ //! 推理长度小于等于训练长度时,不进行插值;推理长度大于训练长度时,放大base动态插值。
189
+ //! 将输入的token序列的位置信息positionIds和inv_freq进行外积,再cos/sin运算得到最终的Rotary embedding的结果。
190
+ //!
191
+ struct DynamicNTKParam {
192
+ //! 选择输出数据类型的参数
193
+ aclDataType outDataType = ACL_DT_UNDEFINED;
194
+ //!
195
+ //! \brief 预留参数
196
+ //!
197
+ uint8_t rsv[12] = {0};
162
198
  };
163
199
 
164
200
  //!
@@ -177,6 +213,10 @@ struct GatherParam {
177
213
  //! \warning 该参数必须大于或等于0,且小于或等于axis.
178
214
  //!
179
215
  int64_t batchDims = 0;
216
+ //!
217
+ //! \brief 预留参数
218
+ //!
219
+ uint8_t rsv[16] = {0};
180
220
  };
181
221
 
182
222
  //!
@@ -193,25 +233,41 @@ struct MultinomialParam {
193
233
  uint32_t numSamples = 1;
194
234
  //! \brief 随机数种子.
195
235
  uint32_t randSeed = 0;
236
+ //!
237
+ //! \brief 预留参数
238
+ //!
239
+ uint8_t rsv[8] = {0};
196
240
  };
197
241
 
198
242
  //!
199
- //! \brief 对输入张量指定维度等分切成多个张量。
243
+ //! \brief 对输入张量指定维度切成多个张量。
200
244
  //!
201
245
  struct SplitParam {
202
246
  //!
203
247
  //! \brief 指定切分的维度索引
204
248
  //!
205
- //! splitDim须位于输入张量x的维度范围内,即如果x的维度为xDim,则splitDim的取值范围为[-xDim, xDim - 1]。
249
+ //! splitDim须位于输入张量x的维度范围内,即如果x的维度为xDim,则等长切分下splitDim的取值范围为[-xDim, xDim - 1]。
206
250
  //! 当splitDim为负数时,其含义是从最高维度开始访问,如splitDim = -1,x维度数为dimNum,则拆分维度为dimNum - 1。
251
+ //! \warning 当使用不等长切分时,splitDim的取值范围为[0, xDim - 1]。
207
252
  //!
208
253
  int32_t splitDim = 0;
209
254
  //!
210
- //! \brief 等分次数,当前支持2或3.
255
+ //! \brief 切分次数,当前支持2或3.
211
256
  //!
212
- //! \warning 输入张量x的维度须能够被splitNum整除,且当splitNum = 3时输入x要求是float16或者bfloat16数据类型。
257
+ //! \warning 等长切分下输入张量x的维度须能够被splitNum整除,且当splitNum = 3时输入x要求是float16或者bf16数据类型。
213
258
  //!
214
259
  int32_t splitNum = 2;
260
+ //!
261
+ //! \brief 指定每个输出tensor在切分维度上的大小
262
+ //!
263
+ //! 不传入此参数时使用等长切分,传入此参数时使用splitV不等长切分
264
+ //! \warning splitSizes中的每一个元素要求大于等于1。splitSizes中的元素之和等于切分维度的大小。
265
+ //!
266
+ SVector<int32_t> splitSizes = {};
267
+ //!
268
+ //! \brief 预留参数
269
+ //!
270
+ uint8_t rsv[8] = {0};
215
271
  };
216
272
 
217
273
  //!
@@ -223,15 +279,18 @@ struct ConcatParam {
223
279
  //!
224
280
  //! 当concatDim为负数时,其含义是从最高维度开始访问,如concatDim = -1,输入张量维度数为dimNum,则拼接维度为dimNum - 1。
225
281
  //!
226
- //! \warning 输入x和y的维数要求一致。输入x或y的维度大小,除了concatDim维外,其他维度要求相同。仅Atlas 800I A2硬件支持bfloat16。
282
+ //! \warning 输入x和y的维数要求一致。输入x或y的维度大小,除了concatDim维外,其他维度要求相同。Atlas 推理系列产品中不支持bf16类型数据。
227
283
  //!
228
284
  int concatDim = 0;
285
+ //!
286
+ //! \brief 预留参数
287
+ //!
288
+ uint8_t rsv[12] = {0};
229
289
  };
230
290
 
231
291
  //!
232
292
  //! \brief 从输入张量某个起始位置中提取指定大小的切片
233
293
  //!
234
- //!
235
294
  struct SliceParam {
236
295
  //!
237
296
  //! \brief 每个维度切片的起始位置
@@ -249,10 +308,14 @@ struct SliceParam {
249
308
  //! \warning size中元素要求大于等于-1。对应维度offsets,以及offsets + size须在x的对应维度的大小范围内。
250
309
  //!
251
310
  SVector<int64_t> size;
311
+ //!
312
+ //! \brief 预留参数
313
+ //!
314
+ uint8_t rsv[8] = {0};
252
315
  };
253
316
 
254
317
  //!
255
- //! \brief Softmax多分类激活函数,将多维(最大8维)Tensor数据在指定轴上映射到0到1之间,且非softmax轴数值之和为1。
318
+ //! \brief Softmax多分类激活函数,将多维(最大8维)Tensor数据在指定轴上映射到0到1之间,且softmax轴数值之和为1。
256
319
  //!
257
320
  struct SoftmaxParam {
258
321
  //!
@@ -262,6 +325,10 @@ struct SoftmaxParam {
262
325
  //! \warning axes的元素要求大于或等于-1且小于输入x的维度
263
326
  //!
264
327
  SVector<int64_t> axes;
328
+ //!
329
+ //! \brief 预留参数
330
+ //!
331
+ uint8_t rsv[8] = {0};
265
332
  };
266
333
 
267
334
  //!
@@ -270,6 +337,10 @@ struct SoftmaxParam {
270
337
  struct TransposeParam {
271
338
  //! 指示输入维度的重排结果, 需要保证输入正确,维度和输入x一致
272
339
  SVector<int32_t> perm;
340
+ //!
341
+ //! \brief 预留参数
342
+ //!
343
+ uint8_t rsv[8] = {0};
273
344
  };
274
345
 
275
346
  //!
@@ -287,27 +358,28 @@ struct ElewiseParam {
287
358
  //! \brief 计算类型
288
359
  //!
289
360
  enum ElewiseType : int {
290
- ELEWISE_UNDEFINED = 0, //!< 默认值,未定义
291
- ELEWISE_CAST, //!< 数据类型转换
292
- ELEWISE_MULS, //!< 向量逐元素乘值
293
- ELEWISE_COS, //!< 逐元素计算余弦值
294
- ELEWISE_SIN, //!< 逐元素计算正弦值
295
- ELEWISE_NEG, //!< 逐元素取相反数
296
- ELEWISE_QUANT, //!< 量化
297
- ELEWISE_LOGICAL_NOT, //!< 逐元素逻辑非
298
- ELEWISE_ADD, //!< 逐元素相加
299
- ELEWISE_MUL, //!< 向量与向量逐元素相乘
300
- ELEWISE_REALDIV, //!< 向量与向量逐元素相除
301
- ELEWISE_LOGICAL_AND, //!< 逐元素逻辑与
302
- ELEWISE_LOGICAL_OR, //!< 逐元素逻辑或
303
- ELEWISE_LESS, //!< 逐元素判断是否小于
304
- ELEWISE_GREATER, //!< 逐元素判断是否大于
305
- ELEWISE_SUB, //!< 逐元素相减
306
- ELEWISE_EQUAL, //!< 逐元素判断是否相等
307
- ELEWISE_QUANT_PER_CHANNEL, //!< 每个通道量化
308
- ELEWISE_DEQUANT_PER_CHANNEL, //!< 每个通道反量化
309
- ELEWISE_DYNAMIC_QUANT, //!< 逐行动态量化
310
- ELEWISE_TANH, //!< 逐元素计算双曲正切值
361
+ ELEWISE_UNDEFINED = 0, //!< 默认值,未定义
362
+ ELEWISE_CAST, //!< 数据类型转换
363
+ ELEWISE_MULS, //!< 向量逐元素乘值
364
+ ELEWISE_COS, //!< 逐元素计算余弦值
365
+ ELEWISE_SIN, //!< 逐元素计算正弦值
366
+ ELEWISE_NEG, //!< 逐元素取相反数
367
+ ELEWISE_QUANT, //!< 量化, 仅在Atlas 800I A2推理产品上支持
368
+ ELEWISE_LOGICAL_NOT, //!< 逐元素逻辑非
369
+ ELEWISE_ADD, //!< 逐元素相加
370
+ ELEWISE_MUL, //!< 向量与向量逐元素相乘
371
+ ELEWISE_REALDIV, //!< 向量与向量逐元素相除
372
+ ELEWISE_LOGICAL_AND, //!< 逐元素逻辑与
373
+ ELEWISE_LOGICAL_OR, //!< 逐元素逻辑或
374
+ ELEWISE_LESS, //!< 逐元素判断是否小于
375
+ ELEWISE_GREATER, //!< 逐元素判断是否大于
376
+ ELEWISE_SUB, //!< 逐元素相减
377
+ ELEWISE_EQUAL, //!< 逐元素判断是否相等
378
+ ELEWISE_QUANT_PER_CHANNEL, //!< 每个通道量化
379
+ ELEWISE_DEQUANT_PER_CHANNEL, //!< 每个通道反量化
380
+ ELEWISE_DYNAMIC_QUANT, //!< 逐行动态量化
381
+ ELEWISE_TANH, //!< 逐元素计算双曲正切值
382
+ ELEWISE_TYPE_MAX //!< 边界值,仅用于判断是否出界,所有情况不能取该值
311
383
  };
312
384
 
313
385
  //! 量化(非每通道)所需参数
@@ -318,12 +390,20 @@ struct ElewiseParam {
318
390
  bool asymmetric = false; //!< false : symmetric,true : asymmetric
319
391
  //! 量化的偏移度
320
392
  int inputOffset = 0;
393
+ //!
394
+ //! \brief 预留参数
395
+ //!
396
+ uint8_t rsv[20] = {0};
321
397
  };
322
398
 
323
399
  //! 向量乘值所需参数
324
400
  struct MulsParam {
325
401
  //! 向量乘的值
326
402
  float varAttr = 0.0f;
403
+ //!
404
+ //! \brief 预留参数
405
+ //!
406
+ uint8_t rsv[12] = {0};
327
407
  };
328
408
 
329
409
  //! 计算方式
@@ -334,6 +414,10 @@ struct ElewiseParam {
334
414
  MulsParam mulsParam;
335
415
  //! 指定数据类型转换输出的数据类型
336
416
  aclDataType outTensorType = ACL_DT_UNDEFINED;
417
+ //!
418
+ //! \brief 预留参数
419
+ //!
420
+ uint8_t rsv[8] = {0};
337
421
  };
338
422
 
339
423
  //!
@@ -341,18 +425,61 @@ struct ElewiseParam {
341
425
  //!
342
426
  //! \brief KVCache处理。
343
427
  //!
344
- struct KvCacheParam {};
428
+ struct KvCacheParam {
429
+ //!
430
+ //! \brief 预留参数
431
+ //!
432
+ uint8_t rsv[8] = {0};
433
+ };
345
434
 
346
435
  //!
347
436
  //! \struct GatingParam
348
437
  //!
349
438
  //! \brief 主要功能为将token和专家的映射关系反转为专家与token的映射关系。算子输入为MoE模型每个token选中专家的索引,算子输出为MoE模型每个专家对应的token的索引。
350
439
  //!
440
+ //! \note 该算子支持TP和EP场景,当参数deviceExpert为空时,为TP场景,否则为EP场景。
441
+ //!
442
+ //! \warning 非Atlas 800I A2推理产品仅支持TP场景。
443
+ //!
351
444
  struct GatingParam {
352
- //! \brief 每个token选中的专家数。取值大于0。
353
- int32_t topkExpertNum = 0;
354
- //! \brief 专家总数。取值范围为[0, 127]。
445
+ //!
446
+ //! \brief 每个token选中的专家数。
447
+ //!
448
+ //! \note 默认值为1。
449
+ //!
450
+ //! \warning 当cumSumNum为0时,取值为1;否则,取值范围为(0, cumSumNum]。
451
+ //!
452
+ int32_t topkExpertNum = 1;
453
+ //!
454
+ //! \brief 专家总数。
455
+ //!
456
+ //! \note 默认值为0。
457
+ //!
458
+ //! \warning 取值范围为[0, 200]。
459
+ //!
355
460
  int32_t cumSumNum = 0;
461
+ //!
462
+ //! \brief 输出的cumSum的类型是否为int64。
463
+ //!
464
+ //! \note 默认值为false。
465
+ //!
466
+ //! \warning 当为false时,输出的cumSum类型为int32.
467
+ //!
468
+ bool cumSumInt64 = false;
469
+ //!
470
+ //! \brief 当前device上的专家索引列表。
471
+ //!
472
+ //! \note 默认为空。
473
+ //!
474
+ //! \warning 列表中各个元素取值范围为[0, cumSumNum),且其中元素值不可重复。
475
+ //!
476
+ //! \warning 当cumSumNum为0时,不可为空。
477
+ //!
478
+ std::vector<int32_t> deviceExpert;
479
+ //!
480
+ //! \brief 预留参数
481
+ //!
482
+ uint8_t rsv[16] = {0};
356
483
  };
357
484
 
358
485
  //!
@@ -364,15 +491,179 @@ struct ReshapeAndCacheParam {
364
491
  //!
365
492
  //! \brief 压缩类型
366
493
  //!
494
+ //! \note 默认值为COMPRESS_TYPE_UNDEFINED(0),不开启压缩功能。
495
+ //!
496
+ //! \warning 仅在Atlas 800I A2推理产品上支持设置为非COMPRESS_TYPE_UNDEFINED(0)的值
497
+ //!
498
+ enum CompressType : int {
499
+ COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
500
+ COMPRESS_TYPE_KVHEAD, //!< alibi场景下压缩key_cache, value_cahe的kvHead维度
501
+ COMPRESS_TYPE_KVHEAD_ROPE //!< rope场景下压缩key_cache, value_cahe的kvHead维度
502
+ };
503
+ //!
504
+ //! \enum KvCacheCfg
505
+ //!
506
+ //! \brief KvCache配置
507
+ //!
508
+ //! \note 默认值为K_CACHE_V_CACHE(0),传入key_cache和value_cache
509
+ //!
510
+ //! \warning 仅在Atlas 800I A2推理产品上支持设置为K_CACHE_V_BYPASS(1)
511
+ //!
512
+ enum KvCacheCfg : int {
513
+ K_CACHE_V_CACHE = 0, //!< 默认值,传入key_cache和value_cache
514
+ K_CACHE_V_BYPASS, //!< 只传入key_cache
515
+ K_CACHE_V_CACHE_NZ //!< 传入key_cache和value_cache,且为NZ格式
516
+ };
517
+
518
+ //! 压缩方式
519
+ CompressType compressType = COMPRESS_TYPE_UNDEFINED;
520
+ //! kvcache配置
521
+ KvCacheCfg kvCacheCfg = K_CACHE_V_CACHE;
522
+ //!
523
+ //! \brief 预留参数
524
+ //!
525
+ uint8_t rsv[16] = {0};
526
+ };
527
+
528
+ //!
529
+ //! \brief 遍历每个key和value,将key和value(num_heads, head_size)按照slotmapping填入key_cache/value_cache指定位置
530
+ //!
531
+ struct ReshapeAndCacheWithStrideParam {
532
+ //!
533
+ //! \enum CompressType
534
+ //!
535
+ //! \brief 压缩类型
536
+ //!
537
+ //! \note 默认值为COMPRESS_TYPE_UNDEFINED(0),不开启压缩功能。
538
+ //!
539
+ //! \warning 仅在Atlas 800I A2推理产品上支持设置为非COMPRESS_TYPE_UNDEFINED(0)的值
540
+ //!
367
541
  enum CompressType : int {
368
- COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
369
- COMPRESS_TYPE_KVHEAD //!< 压缩key_cache, value_cahe的kvHead维度
542
+ COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
543
+ COMPRESS_TYPE_KVHEAD, //!< alibi场景下压缩key_cache, value_cahe的kvHead维度
544
+ COMPRESS_TYPE_KVHEAD_ROPE //!< rope场景下压缩key_cache, value_cahe的kvHead维度
545
+ };
546
+ //!
547
+ //! \enum KvCacheCfg
548
+ //!
549
+ //! \brief KvCache配置
550
+ //!
551
+ //! \note 默认值为K_CACHE_V_CACHE(0),传入key_cache和value_cache
552
+ //!
553
+ //! \warning 仅在Atlas 800I A2推理产品上支持设置为K_CACHE_V_BYPASS(1)
554
+ //!
555
+ enum KvCacheCfg : int {
556
+ K_CACHE_V_CACHE = 0, //!< 默认值,传入key_cache和value_cache
557
+ K_CACHE_V_BYPASS, //!< 只传入key_cache
370
558
  };
371
559
 
372
560
  //! 压缩方式
373
561
  CompressType compressType = COMPRESS_TYPE_UNDEFINED;
562
+ //! kvcache配置
563
+ KvCacheCfg kvCacheCfg = K_CACHE_V_CACHE;
564
+ //!
565
+ //! \brief 预留参数
566
+ //!
567
+ uint8_t rsv[16] = {0};
568
+ };
569
+
570
+ //!
571
+ //! \struct LayerNormWithStrideParam
572
+ //!
573
+ //! \brief LayerNormWithStrideParam归一化处理。当前支持:NORM。
574
+ //!
575
+ //! \warning beginNormAxis维度小于等于输入x的维度。
576
+ //! 所有输入输出Tensor的最后一维大小相等。
577
+ //! Atlas 推理系列产品中不支持bf16类型数据。
578
+ //!
579
+ struct LayerNormWithStrideParam {
580
+ //!
581
+ //! \enum LayerNormType
582
+ //!
583
+ //! \brief 归一化类型:NORM、PRENORM、POSTNORM。
584
+ //!
585
+ enum LayerNormType : int {
586
+ LAYER_NORM_UNDEFINED = 0, //!< 默认值,未定义
587
+ LAYER_NORM_NORM, //!< norm
588
+ LAYER_NORM_PRENORM, //!< prenorm
589
+ LAYER_NORM_POSTNORM, //!< postnorm
590
+ LAYER_NORM_MAX,
591
+ };
592
+ //!
593
+ //! \brief NORM参数。
594
+ //!
595
+ struct NormParam {
596
+ //! \brief 量化类型。
597
+ //! 当前支持以下类型。
598
+ //! QUANT_UNQUANT;
599
+ //! QUANT_INT8
600
+ QuantType quantType = QUANT_UNQUANT;
601
+ //! \brief Epsilon,归一化时加在分母上防止除零。
602
+ float epsilon = 1e-5;
603
+ //! \brief 归一化的维度,默认值为0,从第几维开始norm,同时决定输入gamma和beta维度。
604
+ int32_t beginNormAxis = 0;
605
+ //! \brief 归一化的维度,默认值为0,决定从第几维开始把后面的维度按轴合并。
606
+ int32_t beginParamsAxis = 0;
607
+ //! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
608
+ DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
609
+ //!
610
+ //! \brief 预留参数
611
+ //!
612
+ uint8_t rsv[20] = {0};
613
+ };
614
+ //!
615
+ //! \brief PRENORM参数
616
+ //!
617
+ struct PreNormParam {
618
+ //! \brief 量化类型。
619
+ //! 当前仅支持QUANT_UNQUANT。
620
+ QuantType quantType = QUANT_UNQUANT;
621
+ //! \brief Epsilon,归一化时加在分母上防止除零。
622
+ float epsilon = 1e-5;
623
+ //! \brief 0:高精度 1:高性能(暂不支持)。
624
+ uint64_t opMode = 0;
625
+ //! \brief 缩放因子。
626
+ float zoomScaleValue = 1.0f;
627
+ //!
628
+ //! \brief 预留参数
629
+ //!
630
+ uint8_t rsv[20] = {0};
631
+ };
632
+ //!
633
+ //! \brief POSTNORM参数。
634
+ //!
635
+ struct PostNormParam {
636
+ //! \brief 量化类型。
637
+ //! 当前支持以下类型。
638
+ //! QUANT_UNQUANT;
639
+ //! QUANT_INT8
640
+ QuantType quantType = QUANT_UNQUANT;
641
+ //! \brief Epsilon,归一化时加在分母上防止除零。
642
+ float epsilon = 1e-5;
643
+ //! \brief 0:高精度 1:高性能(暂不支持)。
644
+ uint64_t opMode = 0;
645
+ //! \brief 缩放因子。
646
+ float zoomScaleValue = 1.0f;
647
+ //!
648
+ //! \brief 预留参数
649
+ //!
650
+ uint8_t rsv[20] = {0};
651
+ };
652
+ //! \brief layerType
653
+ LayerNormType layerType = LAYER_NORM_UNDEFINED;
654
+ //! \brief normParam
655
+ NormParam normParam;
656
+ //! \brief preNormParam
657
+ PreNormParam preNormParam;
658
+ //! \brief postNormParam
659
+ PostNormParam postNormParam;
660
+ //!
661
+ //! \brief 预留参数
662
+ //!
663
+ uint8_t rsv[8] = {0};
374
664
  };
375
665
 
666
+
376
667
  //!
377
668
  //! \struct LayerNormParam
378
669
  //!
@@ -380,6 +671,7 @@ struct ReshapeAndCacheParam {
380
671
  //!
381
672
  //! \warning beginNormAxis维度小于等于输入x的维度。
382
673
  //! 所有输入输出Tensor的最后一维大小相等。
674
+ //! Atlas 推理系列产品中不支持bf16类型数据。
383
675
  //!
384
676
  struct LayerNormParam {
385
677
  //!
@@ -388,10 +680,10 @@ struct LayerNormParam {
388
680
  //! \brief 归一化类型:NORM、PRENORM、POSTNORM。
389
681
  //!
390
682
  enum LayerNormType : int {
391
- LAYER_NORM_UNDEFINED = 0, //!< 默认值,未定义
392
- LAYER_NORM_NORM, //!< norm
393
- LAYER_NORM_PRENORM, //!< prenorm
394
- LAYER_NORM_POSTNORM, //!< postnorm
683
+ LAYER_NORM_UNDEFINED = 0, //!< 默认值,未定义
684
+ LAYER_NORM_NORM, //!< norm
685
+ LAYER_NORM_PRENORM, //!< prenorm
686
+ LAYER_NORM_POSTNORM, //!< postnorm
395
687
  LAYER_NORM_MAX,
396
688
  };
397
689
  //!
@@ -400,9 +692,9 @@ struct LayerNormParam {
400
692
  struct NormParam {
401
693
  //! \brief 量化类型。
402
694
  //! 当前支持以下类型。
403
- //! QUANT_UNDEINFED
695
+ //! QUANT_UNQUANT
404
696
  //! QUANT_INT8
405
- QuantType quantType = QUANT_UNDEFINED;
697
+ QuantType quantType = QUANT_UNQUANT;
406
698
  //! \brief Epsilon,归一化时加在分母上防止除零。
407
699
  float epsilon = 1e-5;
408
700
  //! \brief 归一化的维度,默认值为0,从第几维开始norm,同时决定输入gamma和beta维度。
@@ -411,20 +703,28 @@ struct LayerNormParam {
411
703
  int32_t beginParamsAxis = 0;
412
704
  //! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
413
705
  DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
706
+ //!
707
+ //! \brief 预留参数
708
+ //!
709
+ uint8_t rsv[20] = {0};
414
710
  };
415
711
  //!
416
712
  //! \brief PRENORM参数
417
713
  //!
418
714
  struct PreNormParam {
419
715
  //! \brief 量化类型。
420
- //! 当前仅支持QUANT_UNDEINFED
421
- QuantType quantType = QUANT_UNDEFINED;
716
+ //! 当前仅支持QUANT_UNQUANT
717
+ QuantType quantType = QUANT_UNQUANT;
422
718
  //! \brief Epsilon,归一化时加在分母上防止除零。
423
719
  float epsilon = 1e-5;
424
720
  //! \brief 0:高精度 1:高性能(暂不支持)。
425
- size_t opMode = 0;
721
+ uint64_t opMode = 0;
426
722
  //! \brief 缩放因子。
427
723
  float zoomScaleValue = 1.0f;
724
+ //!
725
+ //! \brief 预留参数
726
+ //!
727
+ uint8_t rsv[20] = {0};
428
728
  };
429
729
  //!
430
730
  //! \brief POSTNORM参数。
@@ -432,15 +732,19 @@ struct LayerNormParam {
432
732
  struct PostNormParam {
433
733
  //! \brief 量化类型。
434
734
  //! 当前支持以下类型。
435
- //! QUANT_UNDEINFED
735
+ //! QUANT_UNQUANT
436
736
  //! QUANT_INT8
437
- QuantType quantType = QUANT_UNDEFINED;
737
+ QuantType quantType = QUANT_UNQUANT;
438
738
  //! \brief Epsilon,归一化时加在分母上防止除零。
439
739
  float epsilon = 1e-5;
440
740
  //! \brief 0:高精度 1:高性能(暂不支持)。
441
- size_t opMode = 0;
741
+ uint64_t opMode = 0;
442
742
  //! \brief 缩放因子。
443
743
  float zoomScaleValue = 1.0f;
744
+ //!
745
+ //! \brief 预留参数
746
+ //!
747
+ uint8_t rsv[20] = {0};
444
748
  };
445
749
  //! \brief layerType
446
750
  LayerNormType layerType = LAYER_NORM_UNDEFINED;
@@ -450,6 +754,10 @@ struct LayerNormParam {
450
754
  PreNormParam preNormParam;
451
755
  //! \brief postNormParam
452
756
  PostNormParam postNormParam;
757
+ //!
758
+ //! \brief 预留参数
759
+ //!
760
+ uint8_t rsv[8] = {0};
453
761
  };
454
762
 
455
763
  //!
@@ -458,30 +766,31 @@ struct LayerNormParam {
458
766
  //! \brief RMS归一化处理。
459
767
  //!
460
768
  //! \warning 所有输入输出Tensor的最后一维大小相等。
769
+ //! Atlas 推理系列产品中不支持bf16类型数据。
461
770
  //!
462
771
  struct RmsNormParam {
463
772
  //!
464
773
  //! \brief RmsNormType
465
774
  //!
466
775
  enum RmsNormType : int {
467
- RMS_NORM_UNDEFINED = 0, //!< 默认值,未定义
468
- RMS_NORM_NORM, //!< NORM参数。
469
- RMS_NORM_PRENORM, //!< PRENORM参数。
470
- RMS_NORM_POSTNORM, //!< POSTNORM参数
776
+ RMS_NORM_UNDEFINED = 0, //!< 默认值,未定义
777
+ RMS_NORM_NORM, //!< NORM参数。
778
+ RMS_NORM_PRENORM, //!< PRENORM参数。
779
+ RMS_NORM_POSTNORM, //!< POSTNORM参数
471
780
  };
472
781
  //!
473
782
  //! \brief PrecisionMode
474
783
  //!
475
784
  enum PrecisionMode : int {
476
- HIGH_PRECISION_MODE = 0, //!< 中间计算使用fp32类型
477
- HIGH_PERFORMANCE_MODE, //!< 中间计算使用fp16类型
785
+ HIGH_PRECISION_MODE = 0, //!< 中间计算使用float类型
786
+ HIGH_PERFORMANCE_MODE, //!< 中间计算使用float16类型
478
787
  };
479
788
  //!
480
789
  //! \brief ModelType
481
790
  //!
482
791
  enum ModelType : int {
483
- LLAMA_MODEL = 0, //!< 默认值,使用Llama rmsnorm的公式
484
- GEMMA_MODEL, //!< 使用Gemma rmsnorm的公式
792
+ LLAMA_MODEL = 0, //!< 默认值,使用Llama rmsnorm的公式
793
+ GEMMA_MODEL, //!< 使用Gemma rmsnorm的公式
485
794
  };
486
795
  //!
487
796
  //! \brief NormParam
@@ -489,8 +798,8 @@ struct RmsNormParam {
489
798
  struct NormParam {
490
799
  //! \brief 量化类型。
491
800
  //! 当前支持以下类型。
492
- //! QUANT_UNDEINFED, QUANT_INT8
493
- QuantType quantType = QUANT_UNDEFINED;
801
+ //! QUANT_UNQUANT, QUANT_INT8
802
+ QuantType quantType = QUANT_UNQUANT;
494
803
  //! \brief Epsilon,归一化时加在分母上防止除零。
495
804
  float epsilon = 1e-5;
496
805
  //! \brief Epsilon,默认为1e-5,暂时不使用。
@@ -500,9 +809,9 @@ struct RmsNormParam {
500
809
  bool rstd = false;
501
810
  //! \brief 默认为HIGH_PRECISION_MODE。
502
811
  //! 支持参数如下:
503
- //! HIGH_PRECISION_MODE:默认值,中间计算使用fp32类型
504
- //! HIGH_PERFORMANCE_MODE: 中间计算使用fp16类型
505
- //! 不支持和“rstd”,“modelType”同时设置。
812
+ //! HIGH_PRECISION_MODE:默认值,中间计算使用float类型
813
+ //! HIGH_PERFORMANCE_MODE: 中间计算使用float16类型
814
+ //! 不支持和“rstd”,“modelType”同时设置。输入类型只支持float16。
506
815
  //! 量化场景下不支持使用“precisionMode”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
507
816
  PrecisionMode precisionMode = HIGH_PRECISION_MODE;
508
817
  //! \brief 默认为LLAMA_MODEL,设置为GEMMA_MODEL时使用gemma模型的rmsnorm计算公式。
@@ -514,6 +823,10 @@ struct RmsNormParam {
514
823
  ModelType modelType = LLAMA_MODEL;
515
824
  //! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
516
825
  DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
826
+ //!
827
+ //! \brief 预留参数
828
+ //!
829
+ uint8_t rsv[32] = {0};
517
830
  };
518
831
  //!
519
832
  //! \brief PreNormParam
@@ -521,25 +834,33 @@ struct RmsNormParam {
521
834
  struct PreNormParam {
522
835
  //! \brief 量化类型。
523
836
  //! 当前支持以下类型。
524
- //! QUANT_UNDEINFED
837
+ //! QUANT_UNQUANT
525
838
  //! QUANT_INT8
526
- QuantType quantType = QUANT_UNDEFINED;
839
+ QuantType quantType = QUANT_UNQUANT;
527
840
  //! \brief Epsilon,归一化时加在分母上防止除零。
528
841
  float epsilon = 1e-5;
529
842
  //! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。量化场景下不支持使用“hasBias”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
530
843
  bool hasBias = false;
844
+ //!
845
+ //! \brief 预留参数
846
+ //!
847
+ uint8_t rsv[23] = {0};
531
848
  };
532
849
  //!
533
850
  //! \brief PostNormParam
534
851
  //!
535
852
  struct PostNormParam {
536
853
  //! \brief 量化类型。
537
- //! 当前仅支持QUANT_UNDEINFED
538
- QuantType quantType = QUANT_UNDEFINED;
854
+ //! 当前仅支持QUANT_UNQUANT
855
+ QuantType quantType = QUANT_UNQUANT;
539
856
  //! \brief Epsilon,归一化时加在分母上防止除零。
540
857
  float epsilon = 1e-5;
541
858
  //! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。
542
859
  bool hasBias = false;
860
+ //!
861
+ //! \brief 预留参数
862
+ //!
863
+ uint8_t rsv[23] = {0};
543
864
  };
544
865
  //! \brief 归一化类型,参数如下:
545
866
  //! RMS_NORM_UNDEFINED:默认值,未定义。
@@ -553,123 +874,206 @@ struct RmsNormParam {
553
874
  PreNormParam preNormParam;
554
875
  //! \brief POSTNORM参数。
555
876
  PostNormParam postNormParam;
877
+ //!
878
+ //! \brief 预留参数
879
+ //!
880
+ uint8_t rsv[8] = {0};
556
881
  };
557
882
 
558
883
  //!
559
- //! \struct FillParam
560
- //!
561
- //! \brief 将指定位置设置为value值或者生成一个指定Shape的Tensor并填充为value。
562
- //!
563
- //! \warning 输入x不可以被broadcast。输入mask的元素只能是0或者1,且可以被broadcast。
564
- //!
565
- struct FillParam {
566
- //! \brief 是否Masked Fill。
567
- bool withMask = true;
568
- //! \brief 填充的元素,value是一个只含有一个元素的SVector。
569
- SVector<float> value;
570
- //! \brief withMask = false时,表示输出Tensor的Shape。
571
- SVector<int64_t> outDim;
572
- };
573
-
574
- //!
575
- //! \struct AllGatherParam
576
- //!
577
- //! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.
578
- //!
579
- //! rank、rankSize、rankRoot需满足以下条件:
580
- //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
884
+ //! \struct RmsNormWithStrideParam
581
885
  //!
582
- //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
583
- //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
886
+ //! \brief RMS归一化处理。
584
887
  //!
585
- //! \code
586
- //! rm -rf /dev/shm/sem.lccl*
587
- //! rm -rf /dev/shm/sem.hccl*
588
- //! ipcrm -a
589
- //! \endcode
888
+ //! \warning 所有输入输出Tensor的最后一维大小相等。
889
+ //! Atlas 推理系列产品中不支持bf16类型数据。
590
890
  //!
591
- struct AllGatherParam {
592
- //! \brief 每张卡所属通信编号
593
- int rank = 0;
594
- //! \brief 通信的卡的数量
595
- int rankSize = 0;
596
- //! \brief 主通信编号
597
- int rankRoot = 0;
598
- //! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品(配置Atlas 300I DUO)仅支持backend为"hccl"。
891
+ struct RmsNormWithStrideParam {
599
892
  //!
600
- //! 推理系列产品(配置Atlas 300I DUO)不支持bf16。
601
- //! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
602
- //!
603
- std::string backend = "hccl";
604
- //! \brief HCCL通信域指针
605
- HcclComm hcclComm = nullptr;
606
- //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
607
- CommMode commMode = COMM_MULTI_PROCESS;
893
+ //! \brief RmsNormType
608
894
  //!
609
- //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
895
+ enum RmsNormType : int {
896
+ RMS_NORM_UNDEFINED = 0, //!< 默认值,未定义
897
+ RMS_NORM_NORM, //!< NORM参数。
898
+ RMS_NORM_PRENORM, //!< PRENORM参数。
899
+ RMS_NORM_POSTNORM, //!< POSTNORM参数
900
+ };
610
901
  //!
611
- //! ranktable配置参考
612
- //! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
902
+ //! \brief PrecisionMode
613
903
  //!
614
- std::string rankTableFile;
615
- //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
616
- std::string commDomain;
617
- };
618
-
619
- //!
620
- //! \struct AllReduceParam
621
- //!
622
- //! \brief 将多个通信卡上的数据进行计算,支持相加、取最大、最小、相乘四种计算,然后发送到每张卡上.
623
- //!
624
- //! rank、rankSize、rankRoot需满足以下条件:
625
- //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
626
- //!
627
- //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
628
- //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
629
- //!
630
- //! \code
631
- //! rm -rf /dev/shm/sem.lccl*
632
- //! rm -rf /dev/shm/sem.hccl*
633
- //! ipcrm -a
634
- //! \endcode
635
- //!
636
- struct AllReduceParam {
637
- //! \brief 每张卡所属通信编号.
638
- int rank = 0;
639
- //! \brief 通信的卡的数量.
640
- int rankSize = 0;
641
- //! \brief 主通信编号.
642
- int rankRoot = 0;
643
- //! \brief 通信计算类型,支持"sum","prod","max"和"min".
644
- std::string allReduceType = "sum";
904
+ enum PrecisionMode : int {
905
+ HIGH_PRECISION_MODE = 0, //!< 中间计算使用float类型
906
+ HIGH_PERFORMANCE_MODE, //!< 中间计算使用float16类型
907
+ };
645
908
  //!
646
- //! \brief 通信计算类型,仅支持"hccl"和"lccl".推理系列产品(配置Atlas 300I DUO)仅支持backend为"hccl"。
909
+ //! \brief ModelType
647
910
  //!
648
- //! backend为"hccl"时,支持"sum","prod","max"和"min"; backend为"lccl"时,支持"sum","max"和"min".
649
- //! 当backend为"hccl"时,allReduceType为"prod"时,不支持数据类型为int16和bf16。
650
- //! 当backend为"hccl"时,推理系列产品(配置Atlas 300I DUO)不支持int64,bf16,int16只有allReduceType为"sum"时支持
651
- //! 当backend为"lccl"时,不支持数据类型int64,且若机器拓扑为Atlas 800I A2单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
911
+ enum ModelType : int {
912
+ LLAMA_MODEL = 0, //!< 默认值,使用Llama rmsnorm的公式
913
+ GEMMA_MODEL, //!< 使用Gemma rmsnorm的公式
914
+ };
915
+ //!
916
+ //! \brief NormParam
917
+ //!
918
+ struct NormParam {
919
+ //! \brief 量化类型。
920
+ //! 当前支持以下类型。
921
+ //! QUANT_UNQUANT, QUANT_INT8
922
+ QuantType quantType = QUANT_UNQUANT;
923
+ //! \brief Epsilon,归一化时加在分母上防止除零。
924
+ float epsilon = 1e-5;
925
+ //! \brief Epsilon,默认为1e-5,暂时不使用。
926
+ double layerNormEps = 1e-5;
927
+ //! \brief 默认为False,设置为true时会使用训练的rmsnormforward算子。仅在Atlas 800I A2推理产品上支持该设置。
928
+ //! 不支持和“precisionMode”,“modelType”同时设置。量化场景下不支持使用“rstd”。
929
+ bool rstd = false;
930
+ //! \brief 默认为HIGH_PRECISION_MODE。
931
+ //! 支持参数如下:
932
+ //! HIGH_PRECISION_MODE:默认值,中间计算使用float类型
933
+ //! HIGH_PERFORMANCE_MODE: 中间计算使用float16类型
934
+ //! 不支持和“rstd”,“modelType”同时设置。输入类型只支持float16。
935
+ //! 量化场景下不支持使用“precisionMode”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
936
+ PrecisionMode precisionMode = HIGH_PRECISION_MODE;
937
+ //! \brief 默认为LLAMA_MODEL,设置为GEMMA_MODEL时使用gemma模型的rmsnorm计算公式。
938
+ //! 支持参数如下:
939
+ //! LLAMA_MODEL:默认值, Llama的rms norm计算公式。
940
+ //! GEMMA_MODEL:Gemma的rms norm计算公式。
941
+ //! 不支持和“rstd”,“precisionMode”同时启用。
942
+ //! 量化场景下不支持使用“modelType”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
943
+ ModelType modelType = LLAMA_MODEL;
944
+ //! \brief 动态量化类型。默认为DYNAMIC_QUANT_UNDEFINED非动态量化。当前版本暂不支持非对称动态量化。
945
+ DynamicQuantType dynamicQuantType = DYNAMIC_QUANT_UNDEFINED;
946
+ //!
947
+ //! \brief 预留参数
948
+ //!
949
+ uint8_t rsv[32] = {0};
950
+ };
951
+ //!
952
+ //! \brief PreNormParam
953
+ //!
954
+ struct PreNormParam {
955
+ //! \brief 量化类型。
956
+ //! 当前支持以下类型。
957
+ //! QUANT_UNQUANT
958
+ //! QUANT_INT8
959
+ QuantType quantType = QUANT_UNQUANT;
960
+ //! \brief Epsilon,归一化时加在分母上防止除零。
961
+ float epsilon = 1e-5;
962
+ //! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。量化场景下不支持使用“hasBias”,该场景下配置该参数将返回报错ERROR_INVALID_PARAM。
963
+ bool hasBias = false;
964
+ //!
965
+ //! \brief 预留参数
966
+ //!
967
+ uint8_t rsv[23] = {0};
968
+ };
969
+ //!
970
+ //! \brief PostNormParam
971
+ //!
972
+ struct PostNormParam {
973
+ //! \brief 量化类型。
974
+ //! 当前仅支持QUANT_UNQUANT。
975
+ QuantType quantType = QUANT_UNQUANT;
976
+ //! \brief Epsilon,归一化时加在分母上防止除零。
977
+ float epsilon = 1e-5;
978
+ //! \brief 是否叠加偏置。默认为False,当需要输入beta时设置为True。
979
+ bool hasBias = false;
980
+ //!
981
+ //! \brief 预留参数
982
+ //!
983
+ uint8_t rsv[23] = {0};
984
+ };
985
+ //! \brief 归一化类型,参数如下:
986
+ //! RMS_NORM_UNDEFINED:默认值,未定义。
987
+ //! RMS_NORM_NORM:NORM参数。
988
+ //! RMS_NORM_PRENORM:PRENORM参数。
989
+ //! RMS_NORM_POSTNORM:POSTNORM参数。
990
+ RmsNormType layerType = RMS_NORM_UNDEFINED;
991
+ //! \brief NORM参数。
992
+ NormParam normParam;
993
+ //! \brief PRENORM参数。
994
+ PreNormParam preNormParam;
995
+ //! \brief POSTNORM参数。
996
+ PostNormParam postNormParam;
997
+ //!
998
+ //! \brief 预留参数
999
+ //!
1000
+ uint8_t rsv[8] = {0};
1001
+ };
1002
+
1003
+ //!
1004
+ //! \struct FillParam
1005
+ //!
1006
+ //! \brief 将指定位置设置为value值或者生成一个指定Shape的Tensor并填充为value。
1007
+ //!
1008
+ //! \warning 输入x不可以被broadcast。输入mask的元素只能是0或者1,且可以被broadcast。
1009
+ //!
1010
+ struct FillParam {
1011
+ //! \brief 是否Masked Fill。
1012
+ bool withMask = true;
1013
+ //! \brief 填充的元素,value是一个只含有一个元素的SVector。
1014
+ SVector<float> value;
1015
+ //! \brief withMask = false时,表示输出Tensor的Shape。
1016
+ SVector<int64_t> outDim;
1017
+ //!
1018
+ //! \brief 预留参数
1019
+ //!
1020
+ uint8_t rsv[8] = {0};
1021
+ };
1022
+
1023
+ //!
1024
+ //! \struct AllGatherParam
1025
+ //!
1026
+ //! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.
1027
+ //!
1028
+ //! rank、rankSize、rankRoot需满足以下条件:
1029
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
1030
+ //!
1031
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1032
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
1033
+ //!
1034
+ //! \code
1035
+ //! rm -rf /dev/shm/sem.lccl*
1036
+ //! rm -rf /dev/shm/sem.hccl*
1037
+ //! ipcrm -a
1038
+ //! \endcode
1039
+ //!
1040
+ struct AllGatherParam {
1041
+ //! \brief 当前卡所属通信编号
1042
+ int rank = 0;
1043
+ //! \brief 通信的卡的数量
1044
+ int rankSize = 0;
1045
+ //! \brief 主通信编号
1046
+ int rankRoot = 0;
1047
+ //! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品仅支持backend为"hccl"。
1048
+ //!
1049
+ //! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
652
1050
  //!
653
1051
  std::string backend = "hccl";
654
- //! \brief HCCL通信域指针.
1052
+ //! \brief HCCL通信域指针
1053
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
655
1054
  HcclComm hcclComm = nullptr;
656
- //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
1055
+ //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
657
1056
  CommMode commMode = COMM_MULTI_PROCESS;
658
1057
  //!
659
1058
  //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
660
1059
  //!
661
- //! ranktable配置参考
662
- //! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
663
- //!
664
1060
  std::string rankTableFile;
665
- //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
1061
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
1062
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
1063
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
1064
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
1065
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
666
1066
  std::string commDomain;
1067
+ //!
1068
+ //! \brief 预留参数
1069
+ //!
1070
+ uint8_t rsv[64] = {0};
667
1071
  };
668
1072
 
669
1073
  //!
670
- //! \struct BroadcastParam
1074
+ //! \struct AllGatherVParam
671
1075
  //!
672
- //! \brief 将通信主卡上的数据广播到其他每张卡上, 该算子不支持推理系列产品(配置Atlas 300I DUO)。
1076
+ //! \brief 将多个通信卡上的数据按所属rank号的顺序在第一维进行聚合,然后发送到每张卡上.支持每张卡的数据不等长
673
1077
  //!
674
1078
  //! rank、rankSize、rankRoot需满足以下条件:
675
1079
  //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
@@ -683,19 +1087,23 @@ struct AllReduceParam {
683
1087
  //! ipcrm -a
684
1088
  //! \endcode
685
1089
  //!
686
- struct BroadcastParam {
687
- //! \brief 每张卡所属通信编号.
688
- int rank = 0;
689
- //! \brief 通信的卡的数量.
1090
+ struct AllGatherVParam {
1091
+ //! \brief 当前卡所属通信编号, 默认值为-1, 代表没传rank参数
1092
+ int rank = -1;
1093
+ //! \brief 通信的卡的数量
690
1094
  int rankSize = 0;
691
- //! \brief 主通信编号.
1095
+ //! \brief 主通信编号
692
1096
  int rankRoot = 0;
693
- //! \brief HCCL通信域指针.
1097
+ //! \brief 通信后端指示,仅支持"hccl"和"lccl",Atlas 推理系列产品(Ascend 310P AI处理器)仅支持backend为"hccl"。
1098
+ //!
1099
+ //! 当backend为"lccl"时,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
1100
+ //!
1101
+ std::string backend = "hccl";
1102
+ //! \brief HCCL通信域指针
1103
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
694
1104
  HcclComm hcclComm = nullptr;
695
- //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
1105
+ //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
696
1106
  CommMode commMode = COMM_MULTI_PROCESS;
697
- //! \brief 通信后端指示,仅支持"hccl"和"lccl"。
698
- std::string backend = "hccl";
699
1107
  //!
700
1108
  //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
701
1109
  //!
@@ -705,178 +1113,1338 @@ struct BroadcastParam {
705
1113
  std::string rankTableFile;
706
1114
  //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
707
1115
  std::string commDomain;
1116
+ //!
1117
+ //! \brief 预留参数
1118
+ //!
1119
+ uint8_t rsv[64] = {0};
708
1120
  };
709
1121
 
710
1122
  //!
711
- //! \struct LinearParam
712
- //!
713
- //! \brief 将A、B两个矩阵进行矩阵乘运算,同时可以选择对矩阵乘的运算结果添加偏置或进行反量化操作。
714
- //!
715
- //! 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行矩阵乘运算,可通过参数transposeA与transposeB控制做矩阵乘前是否需要对A矩阵和B矩阵进行行列转置,
716
- //! 根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系,即A矩阵最后一维与B矩阵第0维相等。该算子分为浮点和量化两类,可通过输出数据类型进行选择。
1123
+ //! \brief 判断参数是否相同
717
1124
  //!
718
- //! \warning 在Atlas 推理系列产品(配置Atlas 300I DUO)中,不支持BF16数据类型的计算,即输入和输出张量的数据类型均不支持BF16。
1125
+ //! \param left
1126
+ //! \param right
1127
+ //! \return bool
719
1128
  //!
720
- struct LinearParam {
721
- //! \brief 是否转置A矩阵,默认不转置。
722
- //!
723
- //! 当输入x的维度为3时,transposeA必须为false。
724
- //! 在Atlas 推理系列产品(配置Atlas 300I DUO)中,量化情况下,transposeA必须为false。
725
- bool transposeA = false;
726
- //! \brief 是否转置B矩阵,默认转置。
727
- //!
728
- //! 在Atlas 推理系列产品(配置Atlas 300I DUO)中,量化情况下,transposeB必须为true。
729
- bool transposeB = true;
730
- //! \brief 是否叠加偏置。
731
- //!
732
- //! 在Atlas 推理系列产品(配置Atlas 300I DUO)中,量化情况下,hasBias必须为true。
733
- bool hasBias = true;
734
- //! \brief 输出数据类型.
735
- //!
736
- //! 若为浮点linear,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致;
737
- //! 若为量化linear,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
738
- //! 目前仅支持ACL_FLOAT16/ACL_BF16,在Atlas 推理系列产品(配置Atlas 300I DUO)中,不支持ACL_BF16。
739
- aclDataType outDataType = ACL_DT_UNDEFINED;
740
- };
1129
+ inline bool operator==(const AllGatherVParam &left, const AllGatherVParam &right)
1130
+ {
1131
+ return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
1132
+ left.hcclComm == right.hcclComm && left.commMode == right.commMode && left.backend == right.backend &&
1133
+ left.rankTableFile == right.rankTableFile && left.commDomain == right.commDomain;
1134
+ }
741
1135
 
742
1136
  //!
743
- //! \struct LinearParallelParam
1137
+ //! \struct AllReduceParam
744
1138
  //!
745
- //! \brief 通信计算并行算子,该算子功能为linear和通信算子组合
1139
+ //! \brief 将多个通信卡上的数据进行计算,支持相加、取最大、最小、相乘四种计算,然后发送到每张卡上.
746
1140
  //!
747
- //! 通信和计算是并行处理,和串行相比存在大幅度性能提升.
1141
+ //! rank、rankSize、rankRoot需满足以下条件:
1142
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
748
1143
  //!
749
- //! \see LinearParam,AllReduceParam,AllGatherParam
1144
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1145
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
750
1146
  //!
751
- struct LinearParallelParam {
752
- //!
753
- //! \enum ParallelType
754
- //!
755
- //! \brief 通信类型
756
- //!
757
- enum ParallelType : int {
758
- UNDEFINED = -1, //!< 默认值
759
- LINEAR_ALL_REDUCE = 0, //!< linear+AllReduce
760
- LINEAR_REDUCE_SCATTER = 1, //!< linear+reduce_scatter
761
- ALL_GATHER_LINEAR = 2, //!< AllGather+linear
762
- PURE_LINEAR = 3, //!< linear
763
- MAX = 4, //!< 枚举类型最大值
764
- };
765
- //!
766
- //! \enum QuantType
767
- //!
768
- //! \brief QuantType类型
769
- //!
1147
+ //! \code
1148
+ //! rm -rf /dev/shm/sem.lccl*
1149
+ //! rm -rf /dev/shm/sem.hccl*
1150
+ //! ipcrm -a
1151
+ //! \endcode
1152
+ //!
1153
+ struct AllReduceParam {
1154
+ //! \brief 量化类型
770
1155
  enum QuantType : int {
771
- QUANT_TYPE_UNDEFINED = -1, //!< 默认值
772
- QUANT_TYPE_PER_TENSOR = 0, //!< 对整个张量进行量化
773
- QUANT_TYPE_PER_CHANNEL = 1, //!< 对张量中每个channel分别进行量化
774
- QUANT_TYPE_PER_GROUP = 2, //!< 将张量按quantGroupSize划分后,分别进行量化
1156
+ QUANT_TYPE_UNQUANT = 0, //!< 默认值
1157
+ QUANT_TYPE_UNDEFINED = 0, //!< 默认值
1158
+ QUANT_TYPE_PER_TENSOR = 1, //!< 对整个张量进行量化
1159
+ QUANT_TYPE_PER_CHANNEL = 2, //!< 对张量中每个channel分别进行量化
775
1160
  QUANT_TYPE_MAX = 3, //!< 枚举类型最大值
776
1161
  };
777
- //! \brief 权重是否需要转置,默认为true。
778
- bool transWeight = true;
779
- //! \brief 每张卡所属通信编号.
1162
+
1163
+ //! \brief 当前卡所属通信编号.
780
1164
  int rank = 0;
781
- //! \brief 通信的卡的数量
1165
+ //! \brief 通信的卡的数量.
782
1166
  int rankSize = 0;
783
- //! \brief 主通信编号
1167
+ //! \brief 主通信编号.
784
1168
  int rankRoot = 0;
785
- //! \brief 是否叠加残差。配置为false时不叠加残差,为true时叠加残差。默认不叠加残差。
786
- bool hasResidual = false;
787
- //! \brief 通信后端指示。支持"hccl","lccl","lcoc"。
1169
+ //! \brief 通信计算类型,支持"sum","prod","max"和"min".
1170
+ std::string allReduceType = "sum";
1171
+ //!
1172
+ //! \brief 通信计算类型,仅支持"hccl"和"lccl".Atlas 推理系列产品仅支持backend为"hccl"。
1173
+ //!
1174
+ //! backend为"hccl"时,支持"sum","prod","max"和"min"; backend为"lccl"时,支持"sum","max"和"min".
1175
+ //! 当backend为"hccl"时,allReduceType为"prod"时,不支持数据类型为int16和bf16。
1176
+ //! 当backend为"hccl"时,Atlas 推理系列产品不支持int64,bf16,int16只有allReduceType为"sum"时支持
1177
+ //! 当backend为"lccl"时,不支持数据类型int64,且若机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
1178
+ //!
788
1179
  std::string backend = "hccl";
789
- //! \brief HCCL通信域接口获取的地址指针,仅当"hcclComm"不为nullptr时可用。
1180
+ //! \brief HCCL通信域指针.
1181
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
790
1182
  HcclComm hcclComm = nullptr;
791
- //! \brief 通信模式,CommMode类型枚举值
1183
+ //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
792
1184
  CommMode commMode = COMM_MULTI_PROCESS;
793
- //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景。
794
- std::string rankTableFile;
795
- //! \brief 权重并行类型。
796
- ParallelType type = LINEAR_ALL_REDUCE;
797
- //! \brief 是否返回中间结果,仅在使用ALL_GATHER_LINEAR时生效。
798
- bool keepIntermediate = false;
799
- //! \brief 量化类型。
800
- QuantType quantType = QUANT_TYPE_UNDEFINED;
801
- //! \brief 量化类型为QUANT_TYPE_PER_GROUP时生效。
802
- int32_t quantGroupSize = 0;
803
1185
  //!
804
- //! 若为浮点linear,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致,
805
- //! 若为量化linear,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
806
- //! 如ACL_FLOAT16/ACL_BF16
807
- aclDataType outDataType = ACL_DT_UNDEFINED;
808
- //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
1186
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
1187
+ //!
1188
+ std::string rankTableFile;
1189
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
1190
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
1191
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
1192
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
1193
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
809
1194
  std::string commDomain;
1195
+ //! \brief 量化类型
1196
+ QuantType quantType = QUANT_TYPE_UNQUANT;
1197
+ //! 若为浮点AllReduce,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致;
1198
+ //! 若为量化AllReduce,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
1199
+ //! 量化只支持配置ACL_FLOAT16
1200
+ aclDataType outDataType = ACL_DT_UNDEFINED;
1201
+ //!
1202
+ //! \brief 预留参数
1203
+ //!
1204
+ uint8_t rsv[64] = {0};
810
1205
  };
811
1206
 
812
1207
  //!
813
- //! \struct LinearSparseParam
814
- //!
815
- //! \brief 稀疏量化linear
1208
+ //! \struct BlockCopyParam
816
1209
  //!
817
- //! 该算子实现功能与量化linear类似。不同点在于稀疏量化算子会使用压缩工具提前对weight输入进行压缩,
818
- //! 以此提升算子性能。参数tilingK和tilingN由压缩算法决定,目前均只支持取值为8.
819
- //! 目前该算子仅支持在Atlas 推理系列产品(配置Atlas 300I DUO)中进行运算。
1210
+ //! \brief 将KVCache里通过src indices指定的block数据copy到dst indices指定的block位置上。
820
1211
  //!
821
- struct LinearSparseParam {
822
- //! \brief 是否转置A矩阵,默认不转置。当前仅支持transposeA = false。
823
- bool transposeA = false;
824
- //! \brief 是否转置B矩阵,默认转置。当前仅支持transposeB = true。
825
- bool transposeB = true;
826
- //! \brief 压缩参数,由外部压缩算法决定,默认为1,目前仅支持取值为8。
827
- uint32_t tilingK = 1;
828
- //! \brief 压缩参数,由外部压缩算法决定,默认为1,目前仅支持取值为8。
829
- uint32_t tilingN = 1;
1212
+ struct BlockCopyParam {
1213
+ //!
1214
+ //! \brief 预留参数
1215
+ //!
1216
+ uint8_t rsv[16] = {0};
830
1217
  };
831
1218
 
832
1219
  //!
833
- //! \struct FfnParam
834
- //!
835
- //! \brief 暂不支持
1220
+ //! \struct BroadcastParam
836
1221
  //!
837
- struct FfnParam {
838
- //! \brief 暂不支持
839
- bool firstTransposeA = false;
840
- //! \brief 暂不支持
841
- bool firstTransposeB = false;
842
- //! \brief 暂不支持
843
- bool firstHasBias = true;
844
- //! \brief 暂不支持
845
- ActivationType activationType = ACTIVATION_FAST_GELU;
846
- //! \brief 暂不支持
847
- bool secondTransposeA = false;
848
- //! \brief 暂不支持
849
- bool secondTransposeB = false;
850
- //! \brief 暂不支持
851
- bool secondHasBias = true;
852
- };
853
-
1222
+ //! \brief 将通信主卡上的数据广播到其他每张卡上, 该算子不支持Atlas 推理系列产品。
854
1223
  //!
855
- //! \struct FfnQuantParam
1224
+ //! rank、rankSize、rankRoot需满足以下条件:
1225
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
856
1226
  //!
857
- //! \brief 暂不支持
1227
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1228
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
858
1229
  //!
859
- struct FfnQuantParam {
860
- //! \brief 暂不支持
861
- LinearParam firstLinearParam;
862
- //! \brief 暂不支持
863
- ActivationType activationFuncType = ACTIVATION_FAST_GELU;
864
- //! \brief 暂不支持
865
- LinearParam secondLinearParam;
866
- //! \brief 暂不支持
867
- float inputScale = 1;
868
- //! \brief 暂不支持
869
- int inputOffset = 0;
870
- };
871
-
1230
+ //! \code
1231
+ //! rm -rf /dev/shm/sem.lccl*
1232
+ //! rm -rf /dev/shm/sem.hccl*
1233
+ //! ipcrm -a
1234
+ //! \endcode
1235
+ //!
1236
+
1237
+ struct BroadcastParam {
1238
+ //! \brief 当前卡所属通信编号.
1239
+ int rank = 0;
1240
+ //! \brief 通信的卡的数量.
1241
+ int rankSize = 0;
1242
+ //! \brief 主通信编号.
1243
+ int rankRoot = 0;
1244
+ //! \brief HCCL通信域指针.
1245
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
1246
+ HcclComm hcclComm = nullptr;
1247
+ //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
1248
+ CommMode commMode = COMM_MULTI_PROCESS;
1249
+ //! \brief 通信后端指示,仅支持"hccl"和"lccl"。
1250
+ std::string backend = "hccl";
1251
+ //!
1252
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
1253
+ //!
1254
+ std::string rankTableFile;
1255
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
1256
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
1257
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
1258
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
1259
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
1260
+ std::string commDomain;
1261
+ //!
1262
+ //! \brief 预留参数
1263
+ //!
1264
+ uint8_t rsv[64] = {0};
1265
+ };
1266
+
1267
+ //!
1268
+ //! \struct ReduceScatterParam
1269
+ //!
1270
+ //!
1271
+ //! rank、rankSize、rankRoot需满足以下条件:
1272
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
1273
+ //!
1274
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1275
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
1276
+ //!
1277
+ //! \code
1278
+ //! rm -rf /dev/shm/sem.lccl*
1279
+ //! rm -rf /dev/shm/sem.hccl*
1280
+ //! ipcrm -a
1281
+ //! \endcode
1282
+ //!
1283
+ struct ReduceScatterParam {
1284
+ //! \brief 当前卡所属通信编号.
1285
+ int rank = 0;
1286
+ //! \brief 通信的卡的数量.
1287
+ int rankSize = 0;
1288
+ //! \brief 主通信编号.
1289
+ int rankRoot = 0;
1290
+ //! \brief 当前通信计算类型仅支持"sum","max"和"min",不支持"prod"。
1291
+ std::string reduceType = "sum";
1292
+ //! \brief HCCL通信域指针。
1293
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子。
1294
+ HcclComm hcclComm = nullptr;
1295
+ //! \brief 通信模式,CommMode类型枚举值。
1296
+ CommMode commMode = COMM_MULTI_PROCESS;
1297
+ //! \brief 通信后端指示,当"backend"为lccl且机器拓扑为Atlas 800I A2推理产品单机16卡机器的拓扑时,只支持16卡全量拓扑通信或单节点内任意卡通信。
1298
+ std::string backend = "lccl";
1299
+ //! \brief 集群信息的配置文件路径。
1300
+ std::string rankTableFile;
1301
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
1302
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
1303
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
1304
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
1305
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
1306
+ std::string commDomain;
1307
+ //!
1308
+ //! \brief 预留参数
1309
+ //!
1310
+ uint8_t rsv[64] = {0};
1311
+ };
1312
+
1313
+ //!
1314
+ //! \struct ReduceScatterVParam
1315
+ //!
1316
+ //!
1317
+ //! rank、rankSize、rankRoot需满足以下条件:
1318
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize
1319
+ //!
1320
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1321
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
1322
+ //!
1323
+ //! \code
1324
+ //! rm -rf /dev/shm/sem.lccl*
1325
+ //! rm -rf /dev/shm/sem.hccl*
1326
+ //! ipcrm -a
1327
+ //! \endcode
1328
+ //!
1329
+ struct ReduceScatterVParam {
1330
+ //! \brief 当前卡所属通信编号.
1331
+ int rank = 0;
1332
+ //! \brief 通信的卡的数量.
1333
+ int rankSize = 0;
1334
+ //! \brief 主通信编号.
1335
+ int rankRoot = 0;
1336
+ //! \brief 表示发送数据量的数组.
1337
+ //! 例如,若发送的数据类型为float32,sendCounts[i] = n 表示本rank发给rank i n个float32数据。
1338
+ std::vector<int64_t> sendCounts;
1339
+ //! \brief 表示发送偏移量的数组.
1340
+ //! sdispls[i] = n表示本rank从相对于输入起始位置的的偏移量为n的位置开始发送数据给rank i
1341
+ std::vector<int64_t> sdispls;
1342
+ //! \brief 表示接收数据量.
1343
+ std::int64_t recvCount = 0;
1344
+ //!
1345
+ //! \brief 当前通信计算类型仅支持"sum","max"和"min",不支持"prod"。
1346
+ std::string reduceType = "sum";
1347
+ //! \brief HCCL通信域指针。 当前算子仅支持lccl,此参数为预留参数。
1348
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子。
1349
+ HcclComm hcclComm = nullptr;
1350
+ //! \brief 通信模式,CommMode类型枚举值。
1351
+ CommMode commMode = COMM_MULTI_PROCESS;
1352
+ //! \brief 通信后端指示,当前算子仅支持"hccl"
1353
+ std::string backend = "hccl";
1354
+ //! \brief 集群信息的配置文件路径。
1355
+ std::string rankTableFile;
1356
+ //! \brief 通信device组用通信域名标识。
1357
+ std::string commDomain;
1358
+ //!
1359
+ //! \brief 预留参数
1360
+ //!
1361
+ uint8_t rsv[64] = {0};
1362
+ };
1363
+
1364
+ //!
1365
+ //! \brief 判断参数是否相同
1366
+ //!
1367
+ //! \param left
1368
+ //! \param right
1369
+ //! \return bool
1370
+ //!
1371
+ inline bool operator==(const ReduceScatterVParam &left, const ReduceScatterVParam &right)
1372
+ {
1373
+ return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
1374
+ left.sendCounts == right.sendCounts && left.sdispls == right.sdispls && left.recvCount == right.recvCount &&
1375
+ left.reduceType == right.reduceType && left.hcclComm == right.hcclComm && left.commMode == right.commMode &&
1376
+ left.backend == right.backend && left.rankTableFile == right.rankTableFile &&
1377
+ left.commDomain == right.commDomain;
1378
+ }
1379
+
1380
+ //!
1381
+ //! \struct LinearParam
1382
+ //!
1383
+ //! \brief 将A、B两个矩阵进行矩阵乘运算,同时可以选择对矩阵乘的运算结果进行叠加偏置、InplaceAdd融合或反量化操作。
1384
+ //!
1385
+ //! \note 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行矩阵乘运算,可通过参数transposeA与transposeB控制做矩
1386
+ //! 阵乘前是否需要对A矩阵和B矩阵进行行列转置,根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系。例如,当transposeA为false,
1387
+ //! transposeB为true时,x和weight的shape可以分别为[m, k]和[n, k]。
1388
+ //!
1389
+ //! \note 该算子支持浮点和量化场景,当参数outDataType值为ACL_DT_UNDEFINED时为浮点场景,否则为量化场景。
1390
+ //!
1391
+ struct LinearParam {
1392
+ //! \brief Matmul所有计算类型。
1393
+ enum MatmulType : uint8_t {
1394
+ MATMUL_UNDEFINED = 0,
1395
+ MATMUL_EIN_SUM
1396
+ };
1397
+ //!
1398
+ //! \brief 是否转置A矩阵。
1399
+ //!
1400
+ //! \note 默认值为false,不转置。
1401
+ //!
1402
+ //! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为false。
1403
+ //!
1404
+ bool transposeA = false;
1405
+ //!
1406
+ //! \brief 是否转置B矩阵。
1407
+ //!
1408
+ //! \note 默认值为true,转置。
1409
+ //!
1410
+ //! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为true。
1411
+ //!
1412
+ bool transposeB = true;
1413
+ //!
1414
+ //! \brief 是否叠加偏置。
1415
+ //!
1416
+ //! \note 默认值为true,叠加偏置。
1417
+ //!
1418
+ //! \warning 在量化场景下,非Atlas 800I A2推理产品仅支持配置为true。
1419
+ //!
1420
+ //! \warning enAccum为true时,仅支持配置为false。
1421
+ //!
1422
+ bool hasBias = true;
1423
+ //!
1424
+ //! \brief 输出数据类型。
1425
+ //!
1426
+ //! \note 默认值为ACL_DT_UNDEFINED。
1427
+ //!
1428
+ //! \warning 浮点场景下:支持配置为ACL_DT_UNDEFINED。
1429
+ //!
1430
+ //! \warning 量化场景下:Atlas 800I A2推理产品支持配置为ACL_FLOAT16/ACL_BF16,否则,仅支持配置为ACL_FLOAT16。
1431
+ //!
1432
+ aclDataType outDataType = ACL_DT_UNDEFINED;
1433
+ //!
1434
+ //! \brief 是否使能累加。
1435
+ //!
1436
+ //! \note 默认值为false,不使能累加。
1437
+ //!
1438
+ //! \warning 仅在Atlas 800I A2推理产品支持配置为true。
1439
+ //!
1440
+ //! \warning hasBias为true时,仅支持配置为false。
1441
+ //!
1442
+ //! \warning 量化场景下,仅支持配置为false。
1443
+ //!
1444
+ bool enAccum = false;
1445
+ //!
1446
+ //! \brief matmul类型
1447
+ //!
1448
+ //! \note 默认值为MATMUL_UNDEFINED,非爱因斯坦乘场景。
1449
+ //!
1450
+ //! \warning 取值范围为MATMUL_UNDEFINED/MATMUL_EIN_SUM。
1451
+ //!
1452
+ MatmulType matmulType = MATMUL_UNDEFINED;
1453
+ //!
1454
+ //! \brief 预留参数
1455
+ //!
1456
+ uint8_t rsv[22] = {0};
1457
+ };
1458
+
1459
+ //!
1460
+ //! \struct LinearParallelParam
1461
+ //!
1462
+ //! \brief 通信计算并行算子,该算子功能为linear和通信算子组合
1463
+ //!
1464
+ //! 通信和计算是并行处理,和串行相比存在大幅度性能提升.
1465
+ //!
1466
+ //! \see LinearParam,AllReduceParam,AllGatherParam
1467
+ //!
1468
+ struct LinearParallelParam {
1469
+ //!
1470
+ //! \enum ParallelType
1471
+ //!
1472
+ //! \brief 通信类型
1473
+ //!
1474
+ enum ParallelType : int {
1475
+ UNDEFINED = -1, //!< 默认值
1476
+ LINEAR_ALL_REDUCE = 0, //!< linear+AllReduce
1477
+ LINEAR_REDUCE_SCATTER = 1, //!< linear+reduce_scatter
1478
+ ALL_GATHER_LINEAR = 2, //!< AllGather+linear
1479
+ PURE_LINEAR = 3, //!< linear
1480
+ ALL_GATHER_LINEAR_REDUCE_SCATTER = 4, //!< AllGather+linear+reduce_scatter
1481
+ MAX = 5, //!< 枚举类型最大值
1482
+ };
1483
+ //!
1484
+ //! \enum QuantType
1485
+ //!
1486
+ //! \brief QuantType类型
1487
+ //!
1488
+ enum QuantType : int {
1489
+ QUANT_TYPE_UNDEFINED = -1, //!< 默认值
1490
+ QUANT_TYPE_UNQUANT = -1, //!< 默认值
1491
+ QUANT_TYPE_PER_TENSOR = 0, //!< 对整个张量进行量化
1492
+ QUANT_TYPE_PER_CHANNEL = 1, //!< 对张量中每个channel分别进行量化
1493
+ QUANT_TYPE_PER_GROUP = 2, //!< 将张量按quantGroupSize划分后,分别进行量化
1494
+ QUANT_TYPE_MAX = 3, //!< 枚举类型最大值
1495
+ };
1496
+ //! \brief 权重是否需要转置,默认为true。
1497
+ bool transWeight = true;
1498
+ //! \brief 当前卡所属通信编号.
1499
+ int rank = 0;
1500
+ //! \brief 通信的卡的数量
1501
+ int rankSize = 0;
1502
+ //! \brief 主通信编号
1503
+ int rankRoot = 0;
1504
+ //! \brief 是否叠加残差。配置为false时不叠加残差,为true时叠加残差。默认不叠加残差。
1505
+ bool hasResidual = false;
1506
+ //! \brief 通信后端指示。支持"hccl","lccl","lcoc"。
1507
+ std::string backend = "hccl";
1508
+ //! \brief HCCL通信域接口获取的地址指针,仅当"hcclComm"不为nullptr时可用。
1509
+ HcclComm hcclComm = nullptr;
1510
+ //! \brief 通信模式,CommMode类型枚举值
1511
+ CommMode commMode = COMM_MULTI_PROCESS;
1512
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景。
1513
+ std::string rankTableFile;
1514
+ //! \brief 权重并行类型。
1515
+ ParallelType type = LINEAR_ALL_REDUCE;
1516
+ //! \brief 是否返回中间结果,仅在使用ALL_GATHER_LINEAR时生效。
1517
+ bool keepIntermediate = false;
1518
+ //! \brief 量化类型。
1519
+ QuantType quantType = QUANT_TYPE_UNQUANT;
1520
+ //! \brief 量化类型为QUANT_TYPE_PER_GROUP时生效。
1521
+ int32_t quantGroupSize = 0;
1522
+ //!
1523
+ //! 若为浮点linear,参数outDataType配置为ACL_DT_UNDEFINED,表示输出tensor的数据类型与输入tensor一致,
1524
+ //! 若为量化linear,输出tensor的数据类型与输入tensor不一致,则参数outDataType配置为用户预期输出tensor的数据类型,
1525
+ //! 如ACL_FLOAT16/ACL_BF16
1526
+ aclDataType outDataType = ACL_DT_UNDEFINED;
1527
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
1528
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
1529
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
1530
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
1531
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
1532
+ std::string commDomain;
1533
+ //! \brief AllGather_Matmul_ReduceScatter算子参数结构体
1534
+ struct TwoDimTPInfo {
1535
+ //! \brief 表示ag轴卡数,规定x轴方向是非连续卡号
1536
+ uint16_t agDim = 0;
1537
+ //! \brief 表示rs轴卡数,规定y轴方向是连续卡号
1538
+ uint16_t rsDim = 0;
1539
+ //! \brief 是否沿着内轴进行allgather通信
1540
+ uint8_t innerDimIsAg = 1;
1541
+ //! \brief 填充满8字节
1542
+ uint8_t rsv[3] = {0};
1543
+ };
1544
+ //! \brief AllGather_Matmul_ReduceScatter算子参数
1545
+ TwoDimTPInfo twoDimTPInfo;
1546
+ //!
1547
+ //! \brief 预留参数
1548
+ //!
1549
+ uint8_t rsv[56] = {0};
1550
+ };
1551
+
1552
+ //!
1553
+ //! \struct LinearSparseParam
1554
+ //!
1555
+ //! \brief 稀疏量化linear
1556
+ //!
1557
+ //! 该算子实现功能与量化linear类似。不同点在于稀疏量化算子会使用压缩工具提前对weight输入进行压缩,
1558
+ //! 以此提升算子性能。参数tilingK和tilingN由压缩算法决定,目前均只支持取值为8.
1559
+ //! 目前该算子仅支持在Atlas 推理系列产品中进行运算。
1560
+ //!
1561
+ struct LinearSparseParam {
1562
+ //! \brief 是否转置A矩阵,默认不转置。当前仅支持transposeA = false。
1563
+ bool transposeA = false;
1564
+ //! \brief 是否转置B矩阵,默认转置。当前仅支持transposeB = true。
1565
+ bool transposeB = true;
1566
+ //! \brief 压缩参数,由外部压缩算法决定,默认为8,目前仅支持取值为8。
1567
+ uint32_t tilingK = 8;
1568
+ //! \brief 压缩参数,由外部压缩算法决定,默认为8,目前仅支持取值为8。
1569
+ uint32_t tilingN = 8;
1570
+ //!
1571
+ //! \brief 预留参数
1572
+ //!
1573
+ uint8_t rsv[12] = {0};
1574
+ };
1575
+
1576
+ //!
1577
+ //! \brief 旋转位置编码。hiddenSizeQ必须是hiddenSizeK的整数倍且满足hiddenSizeQ = headDim * headNum。
1578
+ //!
1579
+ struct RopeParam {
1580
+ //! \brief rope,旋转系数,对半旋转是2,支持配置2、4或headDim / 2。
1581
+ int32_t rotaryCoeff = 4;
1582
+ //! \brief 训练用参数,支持配置0或1
1583
+ int32_t cosFormat = 0;
1584
+ //!
1585
+ //! \brief 预留参数
1586
+ //!
1587
+ uint8_t rsv[8] = {0};
1588
+ };
1589
+
1590
+ //!
1591
+ //! \brief 判断参数是否相同
1592
+ //!
1593
+ //! \param left
1594
+ //! \param right
1595
+ //! \return bool
1596
+ //!
1597
+ inline bool operator==(const RopeParam &left, const RopeParam &right)
1598
+ {
1599
+ return left.rotaryCoeff == right.rotaryCoeff && left.cosFormat == right.cosFormat;
1600
+ }
1601
+
1602
+ //!
1603
+ //! \brief 旋转位置编码后进行concat操作。hiddenSizeQ必须是hiddenSizeK的整数倍且满足hiddenSizeQ = headDim * headNum。
1604
+ //!
1605
+ struct RopeQConcatParam {
1606
+ //!
1607
+ //! \brief 预留参数
1608
+ //!
1609
+ uint8_t rsv[16] = {0};
1610
+ };
1611
+
1612
+ //!
1613
+ //! \brief 判断参数是否相同
1614
+ //!
1615
+ //! \param left
1616
+ //! \param right
1617
+ //! \return bool
1618
+ //!
1619
+ inline bool operator==(const RopeQConcatParam &left, const RopeQConcatParam &right)
1620
+ {
1621
+ (void)left;
1622
+ (void)right;
1623
+ return true;
1624
+ }
1625
+
1626
+ //!
1627
+ //! \struct RelayAttentionParam
1628
+ //!
1629
+ //! \brief 通过减少共享组的kv搬运来优化模型吞吐量
1630
+ //!
1631
+ //!
1632
+ struct RelayAttentionParam {
1633
+ //!
1634
+ //! \brief head数量
1635
+ //!
1636
+ //! \note 默认值为0
1637
+ //!
1638
+ int32_t headNum = 0;
1639
+ //!
1640
+ //! \brief 算子tor值
1641
+ //!
1642
+ //! \note 默认值为1.0
1643
+ //!
1644
+ float qkScale = 1;
1645
+ //!
1646
+ //! \brief kv头数量
1647
+ //! \warning 取值范围为[0,8]
1648
+ //! \note 默认值为0
1649
+ //!
1650
+ int32_t kvHeadNum = 0;
1651
+ //!
1652
+ //! \enum MaskType
1653
+ //!
1654
+ //! \brief mask类型
1655
+ //!
1656
+ enum MaskType : int {
1657
+ MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
1658
+ MASK_TYPE_NORM, //!< 倒三角mask
1659
+ };
1660
+ //!
1661
+ //! \brief mask类型
1662
+ //!
1663
+ //! \note 默认值为MASK_TYPE_UNDEFINED
1664
+ //!
1665
+ MaskType maskType = MASK_TYPE_UNDEFINED;
1666
+ //!
1667
+ //! \brief 预留参数
1668
+ //!
1669
+ uint8_t rsv[32] = {0};
1670
+ };
1671
+
1672
+ //!
1673
+ //! \brief KVCache+KVCache+Muls+FlashAttention.
1674
+ //!
1675
+ struct SelfAttentionParam {
1676
+ //!
1677
+ //! \enum CalcType
1678
+ //!
1679
+ //! \brief 计算类型
1680
+ //!
1681
+ enum CalcType : int {
1682
+ UNDEFINED = 0, //!< decoder&encoder for flashAttention
1683
+ ENCODER, //!< encoder for flashAttention
1684
+ DECODER, //!< decoder for flashAttention
1685
+ PA_ENCODER, //!< encoder for pagedAttention
1686
+ PREFIX_ENCODER, //!< prefix encoder for flashAttention
1687
+ };
1688
+ //!
1689
+ //! \enum KernelType
1690
+ //!
1691
+ //! \brief 算子内核精度类型
1692
+ //!
1693
+ enum KernelType : int {
1694
+ KERNELTYPE_DEFAULT = 0, //!< i:float16, bmm:float16, o:float16
1695
+ KERNELTYPE_HIGH_PRECISION //!< i:float16, bmm:float, o:float16
1696
+ };
1697
+ //!
1698
+ //! \enum ClampType
1699
+ //!
1700
+ //! \brief clamp类型
1701
+ //!
1702
+ enum ClampType : int {
1703
+ CLAMP_TYPE_UNDEFINED = 0, //!< 不做clamp
1704
+ CLAMP_TYPE_MIN_MAX //!< 做clamp,同时指定最大最小值
1705
+ };
1706
+ //!
1707
+ //! \enum MaskType
1708
+ //!
1709
+ //! \brief mask类型
1710
+ //!
1711
+ enum MaskType : int {
1712
+ MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
1713
+ MASK_TYPE_NORM, //!< 倒三角mask
1714
+ MASK_TYPE_ALIBI, //!< alibi mask
1715
+ MASK_TYPE_NORM_COMPRESS, //!< 倒三角压缩mask
1716
+ MASK_TYPE_ALIBI_COMPRESS, //!< alibi压缩mask
1717
+ MASK_TYPE_ALIBI_COMPRESS_SQRT, //!< alibi压缩开平方mask
1718
+ MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN, //!< alibi压缩mask左对齐,只支持Atlas 800I A2推理产品
1719
+ MASK_TYPE_SLIDING_WINDOW_NORM, //!< sliding window attention mask
1720
+ MASK_TYPE_SLIDING_WINDOW_COMPRESS //!< sliding window attention压缩mask
1721
+ };
1722
+ //!
1723
+ //! \enum KvCacheCfg
1724
+ //!
1725
+ //! \brief KvCache配置,不支持calcType为PA_ENCODER
1726
+ //!
1727
+ enum KvCacheCfg : int {
1728
+ K_CACHE_V_CACHE = 0, //!< 默认值,进行kvcache处理
1729
+ K_BYPASS_V_BYPASS, //!< 直接传入kvcache
1730
+ };
1731
+ //!
1732
+ //! \enum ScaleType
1733
+ //!
1734
+ //! \brief The type values of ScaleType.
1735
+ //!
1736
+ enum ScaleType : int {
1737
+ SCALE_TYPE_TOR = 0, //!< 默认值,不开启LogN缩放
1738
+ SCALE_TYPE_LOGN, //!< 注意力使用LogN缩放,quantType只能是0
1739
+ SCALE_TYPE_MAX //!< 边界值,仅用于判断是否出界
1740
+ };
1741
+
1742
+ //! \enum QuantType
1743
+ //!
1744
+ //! \brief quant类型
1745
+ //!
1746
+ enum QuantType : int {
1747
+ TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
1748
+ TYPE_QUANT_UNQUANT = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
1749
+ TYPE_DEQUANT_FUSION = 1, //!< 与反量化融合, 预留类型,当前不能够取此值。
1750
+ TYPE_QUANT_QKV_OFFLINE = 2, //!< 离线INT8量化, 只支持Atlas 800I A2推理产品
1751
+ TYPE_QUANT_QKV_ONLINE = 3 //!< 在线INT8量化, 只支持Atlas 800I A2推理产品
1752
+ };
1753
+ //!
1754
+ //! \enum CacheType
1755
+ //!
1756
+ //! \brief cache内部排布类型, 为CACHE_TYPE_SWA开启SWA KVCache优化,只储存后windowSize个token的KVCache,
1757
+ //! 控制KVCache的长度不超过windowSize, 以此减少显存占用
1758
+ //!
1759
+ enum CacheType : int8_t {
1760
+ CACHE_TYPE_NORM = 0, //!< 正常cache
1761
+ CACHE_TYPE_SWA = 1 //!< 固定长度cache
1762
+ };
1763
+ //!
1764
+ //! 量化类型(只支持PA_ENCODER):
1765
+ //! 当值为TYPE_QUANT_QKV_OFFLINE或TYPE_QUANT_QKV_ONLINE时q,k,v为int8。key,value的headsize等长,范围为(0, 256],
1766
+ //! 且32对齐。outdatatype需要配置,只能是ACL_FLOAT16或ACL_BF16。inputLayout只支持TYPE_BSND,calcType只能为PA_ENCODER。
1767
+ QuantType quantType = TYPE_QUANT_UNQUANT;
1768
+
1769
+ //! output数据类型:只支持PA_ENCODER,且QuantType不为TYPE_QUANT_UNQUANT(格式为aclDataType)
1770
+ aclDataType outDataType = ACL_DT_UNDEFINED;
1771
+
1772
+ //! query头大小, 需大于0
1773
+ int32_t headNum = 0;
1774
+ //! kv头数量, 该值需要用户根据使用的模型实际情况传入
1775
+ //! kvHeadNum = 0时,keyCache的k_head_num,valueCache的v_head_num与query的num_heads一致,均为num_heads的数值
1776
+ //! kvHeadNum != 0时,keyCache的k_head_num, valueCache的v_head_num与kvHeadNum值相同
1777
+ int32_t kvHeadNum = 0;
1778
+ //! query缩放系数
1779
+ float qScale = 1;
1780
+ //! 算子tor值, 在Q*K^T后乘
1781
+ float qkScale = 1;
1782
+ //! 是否开启动态batch
1783
+ bool batchRunStatusEnable = false;
1784
+ //! 是否开启倒三角优化, 只有mask为倒三角的时候才能开启优化
1785
+ uint32_t isTriuMask = 0;
1786
+ //! 计算类型
1787
+ CalcType calcType = UNDEFINED;
1788
+ //! 内核精度类型
1789
+ KernelType kernelType = KERNELTYPE_DEFAULT;
1790
+ //! clamp类型
1791
+ ClampType clampType = CLAMP_TYPE_UNDEFINED;
1792
+ //! clamp功能最小值
1793
+ float clampMin = 0;
1794
+ //! clamp功能最大值
1795
+ float clampMax = 0;
1796
+ //! mask类型
1797
+ MaskType maskType = MASK_TYPE_UNDEFINED;
1798
+ //! kvcache配置
1799
+ KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE;
1800
+ //! scale类型
1801
+ ScaleType scaleType = SCALE_TYPE_TOR;
1802
+ //! 数据排布格式默认为BSND
1803
+ InputLayout inputLayout = TYPE_BSND;
1804
+ //! \brief 大于0时开启MLA合并kvcache功能,表示kv合并传入时v的head_size
1805
+ //! \note 默认值为0
1806
+ //! \warning 取值范围为[0,576]
1807
+ uint32_t mlaVHeadSize = 0;
1808
+ //! \brief cache内部排布,开启SWA特性并设置为CACHE_TYPE_SWA可以开启SWA cache优化
1809
+ //! \note 默认值为CACHE_TYPE_NORM
1810
+ //! \warning 只有开启SWA特性后才可以是CACHE_TYPE_SWA
1811
+ CacheType cacheType = CACHE_TYPE_NORM;
1812
+ //! \brief windowSize大于0时开启SWA特性,开启SWA特性后表示sliding window 大小
1813
+ //! \note 默认值为0
1814
+ //! \warning windowSize大于0时需要将maskType设置为MASK_TYPE_SLIDING_WINDOW_NORM或MASK_TYPE_SLIDING_WINDOW_COMPRESS
1815
+ uint32_t windowSize = 0;
1816
+ //!
1817
+ //! \brief 预留参数
1818
+ //!
1819
+ uint8_t rsv[64] = {0};
1820
+ };
1821
+
1822
+ //!
1823
+ //! \brief PagedAttention.
1824
+ //!
1825
+ //! 一个Q有多个token,一个token对应多个KV的token,以token0为例,block_table代表其对应的KV的block_id,-1代表截止,
1826
+ //! 所以第二行和第四行为其目标block,context_lens则表示KV有多少个token,则代表仅有block_id为(3,4,5,9,10)是需要与Q进行计算的。
1827
+ //!
1828
+ struct PagedAttentionParam {
1829
+ //! query 头大小
1830
+ int32_t headNum = 0;
1831
+ //! 算子tor值, 在Q*K^T后乘
1832
+ float qkScale = 1.0;
1833
+ //! kv头数量
1834
+ int32_t kvHeadNum = 0;
1835
+ //!
1836
+ //! \enum MaskType
1837
+ //!
1838
+ //! \brief The type values of MaskType.
1839
+ //!
1840
+ enum MaskType : int {
1841
+ UNDEFINED = 0, //!< 默认值,全0的mask
1842
+ MASK_TYPE_NORM, //!< 倒三角mask
1843
+ MASK_TYPE_ALIBI, //!< alibi mask
1844
+ MASK_TYPE_SPEC //!< 并行解码mask
1845
+ };
1846
+ //! mask类型
1847
+ MaskType maskType = UNDEFINED;
1848
+ //! 是否开启动态batch
1849
+ bool batchRunStatusEnable = false;
1850
+ //!
1851
+ //! \enum QuantType
1852
+ //!
1853
+ //! \brief quant类型
1854
+ //!
1855
+ enum QuantType : int {
1856
+ TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
1857
+ TYPE_QUANT_UNQUANT = 0, //!< 默认值,不与量化融合,此时q,k,v为bf16/float16
1858
+ TYPE_DEQUANT_FUSION = 1, //!< 与反量化融合, 只支持Atlas 800I A2推理产品
1859
+ TYPE_QUANT_QKV_OFFLINE = 2, //!< 离线INT8量化, 只支持Atlas 800I A2推理产品
1860
+ TYPE_QUANT_QKV_ONLINE = 3 //!< 在线INT8量化, 只支持Atlas 800I A2推理产品
1861
+ };
1862
+ //!
1863
+ //! 量化类型:
1864
+ //! 为TYPE_QUANT_UNQUANT时q,keyCache,valueCache为bf16/float16。
1865
+ //! 为TYPE_DEQUANT_FUSION时q为bf16/float16,keyCache,valueCache为int8。
1866
+ //! 为TYPE_QUANT_QKV_OFFLINE或TYPE_QUANT_QKV_ONLINE时q,keyCache,valueCache为int8。
1867
+ //! keyCache,valueCache的headsize等长,范围为(0, 256],且block_size * head_size ≤ 128 * 128。
1868
+ //! outdatatype需要配置,只能是ACL_FLOAT16或ACL_BF16。inputLayout只支持TYPE_BSND。
1869
+ QuantType quantType = TYPE_QUANT_UNQUANT;
1870
+
1871
+ //! output数据类型(格式为aclDataType)
1872
+ aclDataType outDataType = ACL_DT_UNDEFINED;
1873
+
1874
+ //! 开启量化功能后是否使用offset
1875
+ bool hasQuantOffset = false;
1876
+ //!
1877
+ //! \enum CompressType
1878
+ //!
1879
+ //! \brief 压缩类型
1880
+ //!
1881
+ enum CompressType : int {
1882
+ COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
1883
+ COMPRESS_TYPE_KVHEAD, //!< 压缩key_cache, value_cache的kvHead维度, 只支持Atlas 800I A2推理产品。
1884
+ COMPRESS_TYPE_KVHEAD_ROPE, //!< rope场景压缩key_cache, value_cache的kvHead维度, 只支持Atlas 800I A2推理产品。
1885
+ COMPRESS_TYPE_MAX //!< 压缩类型边界值,仅用于判断是否出界,所有情况不能取该值。
1886
+ };
1887
+ //!
1888
+ //! 压缩方式
1889
+ //! 为COMPRESS_TYPE_KVHEAD时,不支持quanttype为2和3。
1890
+ //! 为COMPRESS_TYPE_KVHEAD_ROPE时, maskType需传0。不支持quanttype为2和3。
1891
+ CompressType compressType = COMPRESS_TYPE_UNDEFINED;
1892
+ //!
1893
+ //! \enum CalcType
1894
+ //!
1895
+ //! \brief The type values of CalcType.
1896
+ //!
1897
+ enum CalcType : int {
1898
+ CALC_TYPE_UNDEFINED = 0, //!< 默认值,不开启并行解码
1899
+ CALC_TYPE_SPEC //!< 此计算模式支持传入长度大于1的qseqlen,启用并行解码功能
1900
+ };
1901
+ //! 计算类型
1902
+ CalcType calcType = CALC_TYPE_UNDEFINED;
1903
+
1904
+ //!
1905
+ //! \enum ScaleType
1906
+ //!
1907
+ //! \brief The type values of ScaleType.
1908
+ //!
1909
+ enum ScaleType : int {
1910
+ SCALE_TYPE_TOR = 0, //!< 默认值,不开启LogN缩放
1911
+ SCALE_TYPE_LOGN, //!< 注意力使用LogN缩放
1912
+ SCALE_TYPE_MAX //!< 边界值,仅用于判断是否出界
1913
+ };
1914
+ //! scale类型
1915
+ //! 为SCALE_TYPE_LOGN时,不支持quanttype为2和3。
1916
+ ScaleType scaleType = SCALE_TYPE_TOR;
1917
+
1918
+ //! 数据排布格式默认为BSND
1919
+ InputLayout inputLayout = TYPE_BSND;
1920
+ //! \brief 大于0时开启MLA合并kvcache功能,表示kv合并传入时v的head_size
1921
+ //! \note 默认值为0
1922
+ //! \warning 取值范围为[0,576]
1923
+ uint32_t mlaVHeadSize = 0;
1924
+ //!
1925
+ //! \brief 预留参数
1926
+ //!
1927
+ uint8_t rsv[68] = {0};
1928
+ };
1929
+
1930
+ //!
1931
+ //! \brief 数据格式转换处理。
1932
+ //!
1933
+ //! 使用的NZ的dims约定表示方式:{b, n1, m1m0, n0},对应的ND的dims是{b, m, n},
1934
+ //! 其中:b表示batch,如果batch为1,该维度为1,不可省略。如果batch有多个,该维度为所有batch维度合轴的结果。
1935
+ //! m0/n0表示对齐位,float16时,n0与m0都为16, int8时,n0为32,m0为16,m1m0表示原始ND的m维度经过对齐位向上对齐,
1936
+ //! n1表示原始ND的n维度经过对齐位向上对齐后,除以n0的商。例如原始ND的dims为{8, 100, 30},则其对应的NZ的dims为{8, 2, 112, 16}。
1937
+ //!
1938
+ //! \warning outCrops的长度要求是2,其值须满足以下要求:
1939
+ //! - 如果m0m1落在区间(k1 × 16, (k1 + 1) × 16](其中k1为正整数)内,那么该区间即为outCrops[0]的取值范围要求。
1940
+ //! - 如果n0*n1落在区间(k2 × 16, (k2 + 1) × 16](其中k2为正整数)内,那么该区间即为outCrops[1]的取值范围要求。
1941
+ //!
1942
+ struct TransdataParam {
1943
+ //!
1944
+ //! \enum TransdataType
1945
+ //!
1946
+ //! \brief TransdataType类型值
1947
+ //!
1948
+ enum TransdataType : int {
1949
+ UNDEFINED = 0, //!< 默认
1950
+ FRACTAL_NZ_TO_ND, //!< FRACTAL_NZ转ND
1951
+ ND_TO_FRACTAL_NZ //!< ND转FRACTAL_NZ
1952
+ };
1953
+ //! \brief 数据格式转换类型,支持FRACTAL_NZ和ND互相转换。
1954
+ TransdataType transdataType = UNDEFINED;
1955
+ //! \brief 仅当FRACTAL_NZ转ND时使用,表示原ND数据格式Shape的最后两维。
1956
+ SVector<int64_t> outCrops = {0, 0};
1957
+ //!
1958
+ //! \brief 预留参数
1959
+ //!
1960
+ uint8_t rsv[8] = {0};
1961
+ };
1962
+
1963
+ //!
1964
+ //! \brief 三目运算。
1965
+ //!
1966
+ //! 输入张量为cond,x,y, 输出张量 z = cond ? x : y;
1967
+ //! 输入cond的元素只能是0或者1
1968
+ //! 输出z的维度为输入x与y广播后的结果。要求cond, x, y必须是可广播的。
1969
+ //!
1970
+ struct WhereParam {
1971
+ //!
1972
+ //! \brief 预留参数
1973
+ //!
1974
+ uint8_t rsv[8] = {0};
1975
+ };
1976
+
1977
+ //!
1978
+ //! \brief 将输入Tensor的Shape,按指定轴扩展指定的倍数。
1979
+ //!
1980
+ //! \warning 输出y的维度和multiples维度一致,每个维度大小为输入x广播到multiples维度后和multiples对应维度的乘积。
1981
+ //!
1982
+ struct RepeatParam {
1983
+ //!
1984
+ //! \brief 每一维度上扩展的倍数。
1985
+ //!
1986
+ //! \warning
1987
+ //! - 支持在不超过两个维度上进行扩展
1988
+ //! - multiples的维度小于等于8且需大于或等于输入x的维度,每一个元素要求大于0。
1989
+ //!
1990
+ SVector<int64_t> multiples;
1991
+ //!
1992
+ //! \brief 预留参数
1993
+ //!
1994
+ uint8_t rsv[8] = {0};
1995
+ };
1996
+
1997
+ //!
1998
+ //! \struct SetValueParam
1999
+ //!
2000
+ //! \brief 将输入源张量中的内容拷贝到输入目标张量指定位置中.
2001
+ //!
2002
+ //! 该拷贝为原地拷贝,最终结果修改在输入目标张量中.<br>
2003
+ //! 输入目标张量 dst: [a,b,c], 输入源张量src: [d,e,f].
2004
+ //! dst[starts[0]: ends[0], starts[1]: ends[1], starts[2]: ends[2]] = src.<br>
2005
+ //! 其中 ends[0]-starts[0]需为src第0维的维度大小,ends[1]-starts[1]需为为src第1维的维度大小,ends[2]-starts[2]需为src第2维的维度大小。
2006
+ //!
2007
+ //! \warning 输入src和输入dst的维数须相同.<br>
2008
+ //! 输入src的各维度大小要求小于或等于输入dst对应维度大小.<br>
2009
+ //! 输入src和输入dst的各维度要求有一个或两个维度不相同,且需要满足:
2010
+ //! - 如果有一个维度不相同,则这个维度不能是最高维(第0维)。
2011
+ //! - 如果有两个维度不相同,则其中一个不同的维度必须是最高维(第0维)。
2012
+ //
2013
+ struct SetValueParam {
2014
+ //! \brief 每一维拷贝起始位置
2015
+ SVector<int64_t> starts;
2016
+ //! \brief 每一维拷贝结束位置后一个位置,拷贝到该位置前一个位置为止
2017
+ SVector<int64_t> ends;
2018
+ //! \brief 每一维拷贝步长,当前仅支持strides为全1.
2019
+ SVector<int64_t> strides;
2020
+ //!
2021
+ //! \brief 预留参数
2022
+ //!
2023
+ uint8_t rsv[8] = {0};
2024
+ };
2025
+
2026
+ //!
2027
+ //! \brief 在指定维度上求和、取最大值或最小值,并消除这个维度。
2028
+ //!
2029
+ struct ReduceParam {
2030
+ //!
2031
+ //! \enum ReduceType
2032
+ //!
2033
+ //! \brief ReduceType支持的值
2034
+ //!
2035
+ enum ReduceType {
2036
+ REDUCE_UNDEFINED = 0, //!< 未定义。
2037
+ REDUCE_MAX, //!< 求最大值。
2038
+ REDUCE_MIN, //!< 求最小值。
2039
+ REDUCE_SUM, //!< 求和。
2040
+ };
2041
+ //! \brief reduceType
2042
+ ReduceType reduceType = REDUCE_UNDEFINED;
2043
+ //!
2044
+ //! \brief 指定轴(维度)。
2045
+ //!
2046
+ //! \warning axis不能为空且长度要求小于等于输入x的维度。<br>
2047
+ //! axis可以支持多个轴上进行处理,各元素要求小于x的维度且大于等于0
2048
+ //!
2049
+ SVector<int64_t> axis;
2050
+ //!
2051
+ //! \brief 预留参数
2052
+ //!
2053
+ uint8_t rsv[8] = {0};
2054
+ };
2055
+
2056
+ //!
2057
+ //! \brief 依据给定的词表概率以及top-p,设置随机种子及top-k保留词数,选择最合适的词及对应概率作为输出。
2058
+ //! 支持batch级别随机种子、top-k取样,支持exponential取样
2059
+ //! \warning probs必须是两维张量。
2060
+ //!
2061
+ struct TopkToppSamplingParam {
2062
+ //! \brief 取样处理类型
2063
+ enum TopkToppSamplingType {
2064
+ SAMPLING_UNDEFINED = -1, //!< 未定义
2065
+ SINGLE_TOPK_SAMPLING, //!< 非batch级别随机种子、Topk的取样
2066
+ BATCH_TOPK_MULTINOMIAL_SAMPLING, //!< batch级别随机种子、Topk的multinomial取样
2067
+ BATCH_TOPK_EXPONENTIAL_SAMPLING, //!< batch级别随机种子、Topk的exponential取样
2068
+ BATCH_TOPK_MULTINOMIAL_LOGPROBS_SAMPLING, //!< batch级别随机种子、Topk的multinomial 增加log_Probs取样
2069
+ BATCH_TOPK_EXPONENTIAL_LOGPROBS_SAMPLING, //!< batch级别随机种子、Topk的exponential 增加log_Probs取样
2070
+ SAMPLING_MAX, //!< 枚举最大值
2071
+ };
2072
+ //! \brief 采样类型,默认为非batch级别随机种子、Topk的取样
2073
+ TopkToppSamplingType topkToppSamplingType = SINGLE_TOPK_SAMPLING;
2074
+ //! \brief 当 topkToppSamplingType为BATCH_TOPK_MULTINOMIAL_SAMPLING时使用
2075
+ //! \brief 每个batch下top-p阶段随机抽样使用的随机数种子。
2076
+ //! \brief 维度与batch大小一致。
2077
+ std::vector<uint32_t> randSeeds;
2078
+ //! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
2079
+ //! \brief top-p阶段随机抽样使用的随机数种子。
2080
+ uint32_t randSeed = 0;
2081
+ //! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
2082
+ //! \brief top-k阶段保留的词的个数,需要小于词表的词数。
2083
+ //! \brief top-k必须大于0且小于或等于输入probs最后一维的大小。
2084
+ uint32_t topk = 100;
2085
+ //!
2086
+ //! \brief logProb logprobSwitch=true时有效
2087
+ //!
2088
+ int32_t logProbsSize = 0;
2089
+ //!
2090
+ //! \brief 预留参数
2091
+ //!
2092
+ uint8_t rsv[12] = {0};
2093
+ };
2094
+
2095
+
2096
+ //!
2097
+ //! \struct PadParam
2098
+ //!
2099
+ //! \brief 对于输入input_ids,取出每个batch最后一个有效token的embedding向量
2100
+ //!
2101
+ struct PadParam {
2102
+ //!
2103
+ //! \brief 预留参数
2104
+ //!
2105
+ uint8_t rsv[8] = {0};
2106
+ };
2107
+
2108
+ //!
2109
+ //! \struct UnpadParam
2110
+ //!
2111
+ //! \brief 对于输入input_ids,把所有有效的token拼接在一起,并在最后补0
2112
+ //!
2113
+ struct UnpadParam {
2114
+ //!
2115
+ //! \brief 预留参数
2116
+ //!
2117
+ uint8_t rsv[8] = {0};
2118
+ };
2119
+
2120
+ //!
2121
+ //! \struct SortParam
2122
+ //!
2123
+ //! \brief 后处理计算功能。实现输入tensor在最后一维上降序排列,并保留最大的num个元素,输出排序后的tensor及各元素对应的索引。
2124
+ //!
2125
+ struct SortParam {
2126
+ //!
2127
+ //! \brief 排序后保留的最大的元素的数量。
2128
+ //!
2129
+ //! \warning num是一个仅含有一个值的SVector,该值需大于0且小于等于输入x最后一维的大小。
2130
+ //!
2131
+ SVector<int32_t> num;
2132
+ //!
2133
+ //! \brief 预留参数
2134
+ //!
2135
+ uint8_t rsv[8] = {0};
2136
+ };
2137
+
2138
+ //!
2139
+ //! \struct NonzeroParam
2140
+ //!
2141
+ //! \brief 输出非零值索引。
2142
+ //!
2143
+ //! \warning 仅在Atlas 800I A2推理产品上支持
2144
+ //!
2145
+ struct NonzeroParam {
2146
+ //!
2147
+ //! \brief 预留参数
2148
+ //!
2149
+ uint8_t rsv[8] = {0};
2150
+ };
2151
+
2152
+ //!
2153
+ //! \struct SwiGluQuantParam
2154
+ //!
2155
+ //! \brief 输出非零值索引。
2156
+ //!
2157
+ //! \warning 仅在Atlas 800I A2推理产品上支持
2158
+ //!
2159
+ struct SwigluQuantParam {
2160
+ //!
2161
+ //! \enum QuantType
2162
+ //!
2163
+ //! \brief 量化支持的类型
2164
+ //!
2165
+ enum QuantType : int {
2166
+ QUANT_TYPE_PER_TOKEN = 0, //!< PER_TOKEN量化
2167
+ };
2168
+
2169
+ //! \brief 量化类型。默认为QUANT_TYPE_PER_TOKEN量化。
2170
+ QuantType quantType = QUANT_TYPE_PER_TOKEN;
2171
+
2172
+ //!
2173
+ //! \brief 预留参数
2174
+ //!
2175
+ uint8_t rsv[8] = {0};
2176
+ };
2177
+
2178
+
2179
+ //!
2180
+ //! \struct OnehotParam
2181
+ //!
2182
+ //! \brief onehot编码。
2183
+ //!
2184
+ struct OnehotParam {
2185
+ //! \brief depth所在下标。可为负数。
2186
+ int64_t axis = 0;
2187
+ //! \brief 类别数。
2188
+ int64_t depth = 0;
2189
+ //!
2190
+ //! \brief 预留参数
2191
+ //!
2192
+ uint8_t rsv[8] = {0};
2193
+ };
2194
+
2195
+ //!
2196
+ //! \struct IndexAddParam
2197
+ //!
2198
+ //! \brief 固定维度的指定下标加上某个特定值。
2199
+ //!
2200
+ struct IndexAddParam {
2201
+ //!
2202
+ //! \enum IndexType
2203
+ //!
2204
+ //! \brief 指定下标需要执行的操作类型。
2205
+ //!
2206
+ enum IndexType {
2207
+ INDEX_UNDEFINED = 0, //!< 默认值。不支持。
2208
+ INDEX_ADD, //!< 加
2209
+ INDEX_ADD_VALID, //!< 有效长度内加。不支持Atlas 推理系列产品。
2210
+ };
2211
+ //!
2212
+ //! \brief 指定下标需要执行的操作类型。
2213
+ //!
2214
+ //! \note 默认值为INDEX_UNDEFINED。
2215
+ //!
2216
+ //! \warning 目前支持取值为INDEX_ADD/INDEX_ADD_VALID。
2217
+ //!
2218
+ IndexType indexType = INDEX_UNDEFINED;
2219
+ //!
2220
+ //! \brief 输入Tensor需加上updates更新值的轴。
2221
+ //!
2222
+ //! \note 默认值为0。
2223
+ //!
2224
+ //! \warning 当indexType为INDEX_ADD时,可为负数,取值范围为[-varDimNum, varDimNum - 1]。varDimNum为inTensor0的维度数。
2225
+ //!
2226
+ //! \warning 当indexType为INDEX_ADD_VALID时,仅支持取值为0。
2227
+ //!
2228
+ int64_t axis = 0;
2229
+ //!
2230
+ //! \brief 预留参数
2231
+ //!
2232
+ uint8_t rsv[16] = {0};
2233
+ };
2234
+
2235
+ //!
2236
+ //! \struct SendParam
2237
+ //!
2238
+ //! \brief 将当前通信卡的输入发送至指定通信卡上,当前只支持仅Atlas 800I A2推理产品.Send和Recv需要配套使用
2239
+ //!
2240
+ //! rank、rankSize、rankRoot需满足以下条件:
2241
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ destRank < rankSize
2242
+ //!
2243
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
2244
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
2245
+ //!
2246
+ //! \code
2247
+ //! rm -rf /dev/shm/sem.lccl*
2248
+ //! rm -rf /dev/shm/sem.hccl*
2249
+ //! ipcrm -a
2250
+ //! \endcode
2251
+ //!
2252
+ struct SendParam {
2253
+ //! \brief 当前卡所属通信编号
2254
+ int rank = 0;
2255
+ //! \brief 通信的卡的数量
2256
+ int rankSize = 0;
2257
+ //! \brief 主通信编号
2258
+ int rankRoot = 0;
2259
+ //! \brief 通信域内数据接收端的rank编号.
2260
+ uint32_t destRank = 1;
2261
+ //! \brief 通信后端指示,仅支持"hccl".
2262
+ std::string backend = "hccl";
2263
+ //! \brief HCCL通信域指针
2264
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
2265
+ HcclComm hcclComm = nullptr;
2266
+ //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
2267
+ CommMode commMode = COMM_MULTI_PROCESS;
2268
+ //!
2269
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2270
+ //!
2271
+ std::string rankTableFile;
2272
+ //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
2273
+ std::string commDomain;
2274
+ //!
2275
+ //! \brief 预留参数
2276
+ //!
2277
+ uint8_t rsv[64] = {0};
2278
+ };
2279
+
2280
+ //!
2281
+ //! \struct RecvParam
2282
+ //!
2283
+ //! \brief 从当前通信卡接收来自指定通信卡的数据,当前只支持仅Atlas 800I A2推理产品,Send和Recv需要配套使用
2284
+ //!
2285
+ //! rank、rankSize、rankRoot需满足以下条件:
2286
+ //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ srcRank < rankSize
2287
+ //!
2288
+ //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
2289
+ //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
2290
+ //!
2291
+ //! \code
2292
+ //! rm -rf /dev/shm/sem.lccl*
2293
+ //! rm -rf /dev/shm/sem.hccl*
2294
+ //! ipcrm -a
2295
+ //! \endcode
2296
+ //!
2297
+ struct RecvParam {
2298
+ //! \brief 当前卡所属通信编号
2299
+ int rank = 0;
2300
+ //! \brief 通信的卡的数量
2301
+ int rankSize = 0;
2302
+ //! \brief 主通信编号
2303
+ int rankRoot = 0;
2304
+ //! \brief 通信域内数据发送端的rank编号.
2305
+ uint32_t srcRank = 1;
2306
+ //! \brief 通信后端指示,仅支持"hccl".
2307
+ std::string backend = "hccl";
2308
+ //! \brief HCCL通信域指针
2309
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
2310
+ HcclComm hcclComm = nullptr;
2311
+ //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
2312
+ CommMode commMode = COMM_MULTI_PROCESS;
2313
+ //!
2314
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2315
+ //!
2316
+ std::string rankTableFile;
2317
+ //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
2318
+ std::string commDomain;
2319
+ //!
2320
+ //! \brief 预留参数
2321
+ //!
2322
+ uint8_t rsv[64] = {0};
2323
+ };
2324
+
2325
+ //!
2326
+ //! \struct AllToAllParam
2327
+ //!
2328
+ //! \brief 向通信域内所有通信卡发送相同数据量(输入切分成ranksize份)的数据,并从所有通信卡接收相同数据量的数据,当前只支持仅Atlas 800I A2推理产品.
2329
+ //!
2330
+ struct AllToAllParam {
2331
+ //! \brief 当前卡所属通信编号.
2332
+ int rank = 0;
2333
+ //! \brief 通信的卡的数量.
2334
+ int rankSize = 0;
2335
+ //! \brief 主通信编号.
2336
+ int rankRoot = 0;
2337
+ //!
2338
+ //! \brief 通信计算类型。仅Atlas 800 A3推理产品支持配置为"lccl"。
2339
+ //!
2340
+ std::string backend = "hccl";
2341
+ //! \brief HCCL通信域指针.
2342
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
2343
+ HcclComm hcclComm = nullptr;
2344
+ //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
2345
+ CommMode commMode = COMM_MULTI_PROCESS;
2346
+ //!
2347
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2348
+ //!
2349
+ std::string rankTableFile;
2350
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
2351
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
2352
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
2353
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
2354
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
2355
+ std::string commDomain;
2356
+ //! \brief 通信结果对输入进行转置。
2357
+ //! 仅当backend为"lccl"时生效
2358
+ bool transpose = false;
2359
+ //!
2360
+ //! \brief 预留参数
2361
+ //!
2362
+ uint8_t rsv[62] = {0};
2363
+ };
2364
+
2365
+ //!
2366
+ //! \struct AllToAllVParam
2367
+ //!
2368
+ //! \brief 向通信域内所有通信卡发送数据(数据量可以定制),并从所有通信卡接收数据,当前只支持仅Atlas 800I A2推理产品.
2369
+ //!
2370
+ struct AllToAllVParam {
2371
+ //! \brief 当前卡所属通信编号.
2372
+ int rank = 0;
2373
+ //! \brief 通信的卡的数量.
2374
+ int rankSize = 0;
2375
+ //! \brief 主通信编号.
2376
+ int rankRoot = 0;
2377
+ //! \brief 表示发送数据量的数组.
2378
+ //! 例如,若发送的数据类型为float32,sendCounts[i] = n 表示本rank发给rank i n个float32数据。
2379
+ std::vector<int64_t> sendCounts;
2380
+ //! \brief 表示发送偏移量的数组.
2381
+ //! sdispls[i] = n表示本rank从相对于输入起始位置的的偏移量为n的位置开始发送数据给rank i
2382
+ std::vector<int64_t> sdispls;
2383
+ //! \brief 表示接收数据量的数组.
2384
+ //! 例如,若发送的数据类型为float32,recvCounts[i] = n 表示本rank从rank i收到n个float32数据。
2385
+ std::vector<int64_t> recvCounts;
2386
+ //! \brief 表示接收偏移量的数组.
2387
+ // rdispls[i] = n表示本rank从相对于输出起始位置的的偏移量为n的位置开始接收rank i的数据
2388
+ std::vector<int64_t> rdispls;
2389
+ //!
2390
+ //! \brief 通信计算类型,仅支持"hccl".
2391
+ //!
2392
+ std::string backend = "hccl";
2393
+ //! \brief HCCL通信域指针.
2394
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
2395
+ HcclComm hcclComm = nullptr;
2396
+ //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
2397
+ CommMode commMode = COMM_MULTI_PROCESS;
2398
+ //!
2399
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2400
+ //!
2401
+ std::string rankTableFile;
2402
+ //! \brief 通信device组用通信域名标识,多通信域时使用。
2403
+ //! 当backend为"lccl"时,commMode为多进程时,commDomain需要设置为0-63的数字。
2404
+ //! commMode为多线程时,不支持确定性计算,"LCCL_DETERMINISTIC"需要为0或者false。
2405
+ //! LCCL在多进程/多线程多通信域并发场景下,"LCCL_PARALLEL"需要设置为1或者true。
2406
+ //! 多通信域并行功能使用结束后,"LCCL_PARALLEL"需要设置为0或者false,否则会导致基础场景性能下降。
2407
+ std::string commDomain;
2408
+ //!
2409
+ //! \brief 预留参数
2410
+ //!
2411
+ uint8_t rsv[64] = {0};
2412
+ };
2413
+
872
2414
  //!
873
- //! \brief 旋转位置编码。hiddenSizeQ必须是hiddenSizeK的整数倍且满足hiddenSizeQ = headDim * headNum。
2415
+ //! \struct AllToAllVV2Param
874
2416
  //!
875
- struct RopeParam {
876
- //! \brief rope,旋转系数,对半旋转是2,支持配置2、4或headDim / 2。
877
- int32_t rotaryCoeff = 4;
878
- //! \brief 训练用参数,支持配置0或1
879
- int32_t cosFormat = 0;
2417
+ //! \brief 向通信域内所有通信卡发送数据(数据量可以定制),并从所有通信卡接收数据,当前只支持仅Atlas 800I A2推理产品.
2418
+ //!
2419
+ struct AllToAllVV2Param {
2420
+ //! \brief 当前卡所属通信编号.
2421
+ int rank = -1;
2422
+ //! \brief 通信的卡的数量.
2423
+ int rankSize = 0;
2424
+ //! \brief 主通信编号.
2425
+ int rankRoot = 0;
2426
+ //!
2427
+ //! \brief 通信计算类型,仅支持"hccl".
2428
+ //!
2429
+ std::string backend = "hccl";
2430
+ //! \brief HCCL通信域指针.
2431
+ //! 默认为空,加速库为用户创建;若用户想要自己管理通信域,则需要传入该通信域指针,加速库使用传入的通信域指针来执行通信算子
2432
+ HcclComm hcclComm = nullptr;
2433
+ //! \brief 通信模式,CommMode类型枚举值.hccl多线程只支持外部传入通信域方式
2434
+ CommMode commMode = COMM_MULTI_PROCESS;
2435
+ //!
2436
+ //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2437
+ //!
2438
+ //! ranktable配置参考
2439
+ //! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
2440
+ //!
2441
+ std::string rankTableFile;
2442
+ //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
2443
+ std::string commDomain;
2444
+ //!
2445
+ //! \brief 预留参数
2446
+ //!
2447
+ uint8_t rsv[64] = {0};
880
2448
  };
881
2449
 
882
2450
  //!
@@ -886,454 +2454,546 @@ struct RopeParam {
886
2454
  //! \param right
887
2455
  //! \return bool
888
2456
  //!
889
- inline bool operator==(const RopeParam &left, const RopeParam &right)
2457
+ inline bool operator==(const AllToAllVV2Param &left, const AllToAllVV2Param &right)
890
2458
  {
891
- return left.rotaryCoeff == right.rotaryCoeff && left.cosFormat == right.cosFormat;
2459
+ return left.rank == right.rank && left.rankSize == right.rankSize && left.rankRoot == right.rankRoot &&
2460
+ left.hcclComm == right.hcclComm && left.commMode == right.commMode && left.backend == right.backend &&
2461
+ left.rankTableFile == right.rankTableFile && left.commDomain == right.commDomain;
892
2462
  }
893
2463
 
894
2464
  //!
895
- //! \brief KVCache+KVCache+Muls+FlashAttention.
2465
+ //! \struct GroupTopkParam
896
2466
  //!
897
- struct SelfAttentionParam {
898
- //!
899
- //! \enum CalcType
900
- //!
901
- //! \brief 计算类型
902
- //!
903
- enum CalcType : int {
904
- UNDEFINED = 0, //!< decoder&encoder for flashAttention
905
- ENCODER, //!< encoder for flashAttention
906
- DECODER, //!< decoder for flashAttention
907
- PA_ENCODER //!< encoder for pagedAttention
908
- };
2467
+ //! \brief GroupTopk算子超参数。将输入inTensor0中维度1(inTensor0有2个维度:维度0和维度1)数据分groupNum个组,每组取最大值,然后选出每组最大值中前k个,最后将非前k个组的数据全部置零。
2468
+ //!
2469
+ //! \note
2470
+ //!
2471
+ //! \warning
2472
+ //!
2473
+ struct GroupTopkParam {
909
2474
  //!
910
- //! \enum KernelType
2475
+ //! \brief 每个token分组数量。注:“专家总数”为inTensor0Desc.shape.dims[1]的值。
911
2476
  //!
912
- //! \brief 算子内核精度类型
2477
+ //! \note 必传,默认值为1,取值范围为[1, 专家总数]。
913
2478
  //!
914
- enum KernelType : int {
915
- KERNELTYPE_DEFAULT = 0, //!< i:fp16, bmm:fp16, o:fp16
916
- KERNELTYPE_HIGH_PRECISION //!< i:fp16, bmm:fp32, o:fp16
917
- };
2479
+ //! \warning groupNum需要保证可以被inTensor0Desc.shape.dims[1]整除。
918
2480
  //!
919
- //! \enum ClampType
2481
+ int32_t groupNum = 1;
920
2482
  //!
921
- //! \brief clamp类型
2483
+ //! \brief 选择top K组数量。
922
2484
  //!
923
- enum ClampType : int {
924
- CLAMP_TYPE_UNDEFINED = 0, //!< 不做clamp
925
- CLAMP_TYPE_MIN_MAX //!< 做clamp,同时指定最大最小值
926
- };
2485
+ //! \note 必传,默认值为0,取值范围为[1, groupNum]。
927
2486
  //!
928
- //! \enum MaskType
2487
+ //! \warning
929
2488
  //!
930
- //! \brief mask类型
2489
+ int32_t k = 0;
931
2490
  //!
932
- enum MaskType : int {
933
- MASK_TYPE_UNDEFINED = 0, //!< 默认值,全0mask
934
- MASK_TYPE_NORM, //!< 倒三角mask
935
- MASK_TYPE_ALIBI, //!< alibi mask
936
- MASK_TYPE_NORM_COMPRESS, //!< 倒三角压缩mask
937
- MASK_TYPE_ALIBI_COMPRESS, //!< alibi压缩mask
938
- MASK_TYPE_ALIBI_COMPRESS_SQRT, //!< alibi压缩开平方mask
939
- MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN //!< alibi压缩mask左对齐,只支持Atlas 800I A2
940
- };
2491
+ //! \enum GroupMultiFlag
941
2492
  //!
942
- //! \enum KvCacheCfg
2493
+ //! \brief 指定GroupTopk每组中取值计算的方式。
943
2494
  //!
944
- //! \brief KvCache配置,不支持calcType为PA_ENCODER
2495
+ //! \warning
945
2496
  //!
946
- enum KvCacheCfg :int {
947
- K_CACHE_V_CACHE = 0, //!< 默认值,进行kvcache处理
948
- K_BYPASS_V_BYPASS, //!< 直接传入kvcache
2497
+ enum GroupMultiFlag : uint16_t {
2498
+ UNDEFINED = 0, //!< 默认方式,每组内取最大值。
2499
+ SUM_MULTI_MAX //!< 每组内取n个最大值求和,需要设置参数n
949
2500
  };
950
- //! query头大小, 需大于或等于0
951
- int32_t headNum = 0;
952
- //! kv头数量, 该值需要用户根据使用的模型实际情况传入
953
- //! kvHeadNum = 0时,keyCache的k_head_num,valueCache的v_head_num与query的num_heads一致,均为num_heads的数值
954
- //! kvHeadNum != 0时,keyCache的k_head_num, valueCache的v_head_num与kvHeadNum值相同
955
- int32_t kvHeadNum = 0;
956
- //! query缩放系数
957
- float qScale = 1;
958
- //! 算子tor值, 在Q*K^T后乘
959
- float qkScale = 1;
960
- //! 是否开启动态batch
961
- bool batchRunStatusEnable = false;
962
- //! 是否开启倒三角优化, 只有mask为倒三角的时候才能开启优化
963
- uint32_t isTriuMask = 0;
964
- //! 计算类型
965
- CalcType calcType = UNDEFINED;
966
- //! 内核精度类型
967
- KernelType kernelType = KERNELTYPE_DEFAULT;
968
- //! clamp类型
969
- ClampType clampType = CLAMP_TYPE_UNDEFINED;
970
- //! clamp功能最小值
971
- float clampMin = 0;
972
- //! clamp功能最大值
973
- float clampMax = 0;
974
- //! mask类型
975
- MaskType maskType = MASK_TYPE_UNDEFINED;
976
- //! kvcache配置
977
- KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE;
978
- };
979
-
980
- //!
981
- //! \brief PagedAttention.
982
- //!
983
- //! 一个Q有多个token,一个token对应多个KV的token,以token0为例,block_table代表其对应的KV的block_id,-1代表截止,
984
- //! 所以第二行和第四行为其目标block,context_lens则表示KV有多少个token,则代表仅有block_id为(3,4,5,9,10)是需要与Q进行计算的。
985
- //!
986
- struct PagedAttentionParam {
987
- //! query 头大小
988
- int32_t headNum = 0;
989
- //! 算子tor值, 在Q*K^T后乘
990
- float qkScale = 1.0;
991
- //! kv头数量
992
- int32_t kvHeadNum = 0;
993
- //!
994
- //! \enum MaskType
995
- //!
996
- //! \brief The type values of MaskType.
997
2501
  //!
998
- enum MaskType : int {
999
- UNDEFINED = 0, //!< 默认值,全0的mask
1000
- MASK_TYPE_NORM, //!< 倒三角mask
1001
- MASK_TYPE_ALIBI, //!< alibi mask
1002
- MASK_TYPE_SPEC //!< 并行解码mask
1003
- };
1004
- //! mask类型
1005
- MaskType maskType = UNDEFINED;
1006
- //! 是否开启动态batch
1007
- bool batchRunStatusEnable = false;
2502
+ //! \brief 指定GroupTopk每组中取值计算的方式。
1008
2503
  //!
1009
- //! \enum QuantType
2504
+ //! \note 默认值为UNDEFINED。
1010
2505
  //!
1011
- //! \brief quant类型
2506
+ //! \warning 取值为SUM_MULTI_MAX时需要传入参数n。
1012
2507
  //!
1013
- enum QuantType : int {
1014
- TYPE_QUANT_UNDEFINED = 0, //!< 默认值,不与量化融合
1015
- TYPE_DEQUANT_FUSION //!< 与反量化融合, 只支持Atlas 800I A2
1016
- };
1017
- //! 量化类型
1018
- QuantType quantType = TYPE_QUANT_UNDEFINED;
1019
- //! 开启量化功能后是否使用offset
1020
- bool hasQuantOffset = false;
2508
+ GroupMultiFlag groupMultiFlag = UNDEFINED;
1021
2509
  //!
1022
- //! \enum CompressType
2510
+ //! \brief 每组内取值的个数。
1023
2511
  //!
1024
- //! \brief 压缩类型
2512
+ //! \note 默认值为1,取值范围为[1,expert_num/groupNum]。
1025
2513
  //!
1026
- enum CompressType : int {
1027
- COMPRESS_TYPE_UNDEFINED = 0, //!< 默认值,不压缩
1028
- COMPRESS_TYPE_KVHEAD //!< 压缩key_cache, value_cahe的kvHead维度, 只支持Atlas 800I A2
1029
- };
1030
-
1031
- //! 压缩方式
1032
- CompressType compressType = COMPRESS_TYPE_UNDEFINED;
2514
+ //! \warning 只有当groupMultiFlag为SUM_MULTI_MAX时有效
1033
2515
  //!
1034
- //! \enum CalcType
2516
+ uint16_t n = 1;
1035
2517
  //!
1036
- //! \brief The type values of CalcType.
2518
+ //! \brief 预留参数
1037
2519
  //!
1038
- enum CalcType : int {
1039
- CALC_TYPE_UNDEFINED = 0, //!< 默认值,不开启并行解码
1040
- CALC_TYPE_SPEC //!< 并行解码功能
1041
- };
1042
- //! 计算类型
1043
- CalcType calcType = CALC_TYPE_UNDEFINED;
2520
+ uint8_t rsv[12] = {0};
1044
2521
  };
1045
2522
 
1046
2523
  //!
1047
- //! \brief 数据格式转换处理。
2524
+ //! \struct GroupedMatmulWithRoutingParam
1048
2525
  //!
1049
- //! 使用的NZ的dims约定表示方式:{b, n1, m1m0, n0},对应的ND的dims是{b, m, n},
1050
- //! 其中:b表示batch,如果batch为1,该维度为1,不可省略。如果batch有多个,该维度为所有batch维度合轴的结果。
1051
- //! m0/n0表示对齐位,float16时,n0与m0都为16, int8时,n0为32,m0为16,m1m0表示原始ND的m维度经过对齐位向上对齐,
1052
- //! n1表示原始ND的n维度经过对齐位向上对齐后,除以n0的商。例如原始ND的dims为{8, 100, 30},则其对应的NZ的dims为{8, 2, 112, 16}。
2526
+ //! \brief 实现了GroupedMatmulWithRouting算子的Up和Down方法,将topK个专家权重与token激活值做矩阵乘法计算。
1053
2527
  //!
1054
- //! \warning outCrops的长度要求是2,其值须满足以下要求:
1055
- //! - 如果m0m1落在区间(k1 × 16, (k1 + 1) × 16](其中k1为正整数)内,那么该区间即为outCrops[0]的取值范围要求。
1056
- //! - 如果n0*n1落在区间(k2 × 16, (k2 + 1) × 16](其中k2为正整数)内,那么该区间即为outCrops[1]的取值范围要求。
2528
+ //! \warning 仅Atlas 800I A2推理产品支持该算子
1057
2529
  //!
1058
- struct TransdataParam {
2530
+
2531
+ struct GroupedMatmulWithRoutingParam {
1059
2532
  //!
1060
- //! \enum TransdataType
2533
+ //! \enum GroupedMatmulType
1061
2534
  //!
1062
- //! \brief TransdataType类型值
2535
+ //! \brief 指定GroupedMatmulWithRouting算子需要执行的操作类型。
1063
2536
  //!
1064
- enum TransdataType : int {
1065
- UNDEFINED = 0, //!< 默认
1066
- FRACTAL_NZ_TO_ND, //!< FRACTAL_NZ转ND
1067
- ND_TO_FRACTAL_NZ //!< ND转FRACTAL_NZ
2537
+ enum GroupedMatmulType : int {
2538
+ GROUPED_MATMUL_UP = 0, //!< 默认值。up类型。
2539
+ GROUPED_MATMUL_DOWN //!< down类型。
1068
2540
  };
1069
- //! \brief 数据格式转换类型,支持FRACTAL_NZ和ND互相转换。
1070
- TransdataType transdataType = UNDEFINED;
1071
- //! \brief 仅当FRACTAL_NZ转ND时使用,表示原ND数据格式Shape的最后两维。
1072
- SVector<int64_t> outCrops = { 0, 0 };
1073
- };
1074
-
1075
- //!
1076
- //! \brief 三目运算。
1077
- //!
1078
- //! 输入张量为cond,x,y, 输出张量 z = cond ? x : y;
1079
- //! 输入cond的元素只能是0或者1
1080
- //! 输出z的维度为输入x与y广播后的结果。要求cond, x, y必须是可广播的。
1081
- //!
1082
- struct WhereParam {};
1083
-
1084
- //!
1085
- //! \brief 将输入Tensor的Shape,按指定轴扩展指定的倍数。
1086
- //!
1087
- //! \warning 输出y的维度和multiples维度一致,每个维度大小为输入x广播到multiples维度后和multiples对应维度的乘积。
1088
- //!
1089
- struct RepeatParam {
2541
+ //! \brief 是否转置B矩阵(专家权重)。
2542
+ bool transposeB = true;
2543
+ //! \brief 选取的topK专家个数
2544
+ int32_t topK = 0;
1090
2545
  //!
1091
- //! \brief 每一维度上扩展的倍数。
2546
+ //! \brief 指定GroupedMatmulWithRouting算子需要执行的操作类型。
1092
2547
  //!
1093
- //! \warning
1094
- //! - 支持在不超过两个维度上进行扩展
1095
- //! - multiples的维度小于等于8且需大于或等于输入x的维度,每一个元素要求大于0。
2548
+ //! \note 默认值为GROUPED_MATMUL_UP。
1096
2549
  //!
1097
- SVector<int64_t> multiples;
2550
+ //! \warning 目前支持取值为GROUPED_MATMUL_UP/GROUPED_MATMUL_DOWN。
2551
+ //!
2552
+ GroupedMatmulType groupedMatmulType = GROUPED_MATMUL_UP;
2553
+ //!
2554
+ //! \brief 指定输出值的反量化类型。
2555
+ //!
2556
+ //! \note 默认值为ACL_DT_UNDEFINED。
2557
+ //!
2558
+ //! \warning 非量化场景下:仅支持配置为ACL_DT_UNDEFINED。量化场景下支持ACL_FLOAT16/ACL_BF16
2559
+ //!
2560
+ aclDataType outDataType = ACL_DT_UNDEFINED;
2561
+ //!
2562
+ //! \brief 预留参数
2563
+ //!
2564
+ uint8_t rsv[16] = {0};
1098
2565
  };
1099
2566
 
1100
2567
  //!
1101
- //! \struct SetValueParam
1102
- //!
1103
- //! \brief 将输入源张量中的内容拷贝到输入目标张量指定位置中.
1104
- //!
1105
- //! 该拷贝为原地拷贝,最终结果修改在输入目标张量中.<br>
1106
- //! 输入目标张量 dst: [a,b,c], 输入源张量src: [d,e,f].
1107
- //! dst[starts[0]: ends[0], starts[1]: ends[1], starts[2]: ends[2]] = src.<br>
1108
- //! 其中 ends[0]-starts[0]需为src第0维的维度大小,ends[1]-starts[1]需为为src第1维的维度大小,ends[2]-starts[2]需为src第2维的维度大小。
2568
+ //! \struct GroupedMatmulInplaceAddParam
1109
2569
  //!
1110
- //! \warning 输入src和输入dst的维数须相同.<br>
1111
- //! 输入src的各维度大小要求小于或等于输入dst对应维度大小.<br>
1112
- //! 输入src和输入dst的各维度要求有一个或两个维度不相同,且需要满足:
1113
- //! - 如果有一个维度不相同,则这个维度不能是最高维(第0维)。
1114
- //! - 如果有两个维度不相同,则其中一个不同的维度必须是最高维(第0维)。
1115
- //
1116
- struct SetValueParam {
1117
- //! \brief 每一维拷贝起始位置
1118
- SVector<int64_t> starts;
1119
- //! \brief 每一维拷贝结束位置后一个位置,拷贝到该位置前一个位置为止
1120
- SVector<int64_t> ends;
1121
- //! \brief 每一维拷贝步长,当前仅支持strides为全1.
1122
- SVector<int64_t> strides;
1123
- };
1124
-
2570
+ //! \brief 将A、B两个矩阵按照规则进行分组矩阵乘运算,并累加在矩阵C上作为输出。
1125
2571
  //!
1126
- //! \brief 在指定维度上求和、取最大值或最小值,并消除这个维度。
2572
+ //! \note 算子本质上是接收x和weight两个输入tensor作为A矩阵和B矩阵进行分组矩阵乘运算并累加在矩阵C上,可通过参数transposeA与transposeB控制做矩
2573
+ //! 阵乘前是否需要对A矩阵和B矩阵进行行列转置,根据参数转置后的A矩阵和B矩阵需满足矩阵乘维度关系。例如,当transposeA为false,
2574
+ //! transposeB为true时,x和weight的shape可以分别为[m, k]和[n, k]。
1127
2575
  //!
1128
- struct ReduceParam {
2576
+ struct GroupedMatmulInplaceAddParam {
1129
2577
  //!
1130
- //! \enum ReduceType
2578
+ //! \brief 是否转置A矩阵。
1131
2579
  //!
1132
- //! \brief ReduceType支持的值
2580
+ //! \note 默认值为false,不转置。
1133
2581
  //!
1134
- enum ReduceType {
1135
- REDUCE_UNDEFINED = 0, //!< 未定义。
1136
- REDUCE_MAX, //!< 求最大值。
1137
- REDUCE_MIN, //!< 求最小值。
1138
- REDUCE_SUM, //!< 求和。
1139
- };
1140
- //! \brief reduceType
1141
- ReduceType reduceType = REDUCE_UNDEFINED;
2582
+ bool transposeA = false;
1142
2583
  //!
1143
- //! \brief 指定轴(维度)。
2584
+ //! \brief 是否转置B矩阵。
1144
2585
  //!
1145
- //! \warning axis不能为空且长度要求小于等于输入x的维度。<br>
1146
- //! axis可以支持多个轴上进行处理,各元素要求小于x的维度且大于等于0
2586
+ //! \note 默认值为false,不转置,当前仅支持false。
1147
2587
  //!
1148
- SVector<int64_t> axis;
1149
- };
1150
-
1151
- //!
1152
- //! \brief 依据给定的词表概率以及top-p,设置随机种子及top-k保留词数,选择最合适的词及对应概率作为输出。
1153
- //! 支持btach级别随机种子、top-k取样,支持exponential取样
1154
- //! \warning probs必须是两维张量。
1155
- //!
1156
- struct TopkToppSamplingParam {
1157
- //! \brief 取样处理类型
1158
- enum TopkToppSamplingType {
1159
- SAMPLING_UNDEFINED = -1, //!< 未定义
1160
- SINGLE_TOPK_SAMPLING, //!< 非batch级别随机种子、Topk的取样
1161
- BATCH_TOPK_MULTINOMIAL_SAMPLING, //!< batch级别随机种子、Topk的multinomial取样
1162
- BATCH_TOPK_EXPONENTIAL_SAMPLING, //!< batch级别随机种子、Topk的exponential取样
1163
- SAMPLING_MAX, //!< 枚举最大值
1164
- };
1165
- //! \brief 采样类型,默认为非batch级别随机种子、Topk的取样
1166
- TopkToppSamplingType topkToppSamplingType = SINGLE_TOPK_SAMPLING;
1167
- //! \brief 当 topkToppSamplingType为BATCH_TOPK_MULTINOMIAL_SAMPLING时使用
1168
- //! \brief 每个batch下top-p阶段随机抽样使用的随机数种子。
1169
- //! \brief 维度与batch大小一致。
1170
- std::vector<uint32_t> randSeeds;
1171
- //! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
1172
- //! \brief top-p阶段随机抽样使用的随机数种子。
1173
- uint32_t randSeed = 0;
1174
- //! \brief 当 topkToppSamplingType为SINGLE_TOPK_SAMPLING时使用
1175
- //! \brief top-k阶段保留的词的个数,需要小于词表的词数。
1176
- //! \brief top-k必须大于0且小于或等于输入probs最后一维的大小。
1177
- uint32_t topk = 100;
2588
+ bool transposeB = false;
2589
+ //!
2590
+ //! \brief 预留参数
2591
+ //!
2592
+ uint8_t rsv[22] = {0};
1178
2593
  };
1179
2594
 
1180
-
1181
- //!
1182
- //! \struct PadParam
1183
- //!
1184
- //! \brief 对于输入input_ids,取出每个batch最后一个有效token的embedding向量
1185
2595
  //!
1186
- struct PadParam {};
1187
-
2596
+ //! \struct CohereLayerNormParam
1188
2597
  //!
1189
- //! \struct UnpadParam
2598
+ //! \brief CohereLayerNorm可以将网络层输入根据最后一维归一化到[0, 1]之间。
1190
2599
  //!
1191
- //! \brief 对于输入input_ids,把所有有效的token拼接在一起,并在最后补0
2600
+ //! \note 针对Command R Plus模型,对多batch数据用于表示根据最后一维进行归一化操作。
1192
2601
  //!
1193
- struct UnpadParam {};
2602
+ struct CohereLayerNormParam {
2603
+ //!
2604
+ //! \brief epsilon,放在分母上防止除0。
2605
+ //!
2606
+ //! \note 默认值为1e-5。
2607
+ //!
2608
+ //! \warning epsilon的取值要求大于0。
2609
+ float epsilon = 1e-5;
2610
+ //!
2611
+ //! \brief 预留参数
2612
+ //!
2613
+ uint8_t rsv[32] = {0};
2614
+ };
1194
2615
 
1195
2616
  //!
1196
- //! \struct SortParam
2617
+ //! \struct GatherPreRmsNormParam
1197
2618
  //!
1198
- //! \brief 后处理计算功能。实现输入tensor在最后一维上降序排列,并保留最大的num个元素,输出排序后的tensor及各元素对应的索引。
2619
+ //! \brief 首先对ResIn进行Gather索引操作,然后与X相加,最后进行RmsNorm计算。
1199
2620
  //!
1200
- struct SortParam {
2621
+ //! \warning 仅Atlas 800I A2推理产品支持该算子
2622
+ //!
2623
+ struct GatherPreRmsNormParam {
1201
2624
  //!
1202
- //! \brief 排序后保留的最大的元素的数量。
2625
+ //! \brief epsilon,放在分母上防止除0。
1203
2626
  //!
1204
- //! \warning num是一个仅含有一个值的SVector,该值需大于0且小于等于输入x最后一维的大小。
2627
+ //! \note 默认值为1e-5。
1205
2628
  //!
1206
- SVector<int32_t> num;
2629
+ //! \warning epsilon的取值要求大于0。
2630
+ float epsilon = 1e-5;
2631
+ //!
2632
+ //! \brief 预留参数
2633
+ //!
2634
+ uint8_t rsv[28] = {0};
1207
2635
  };
1208
2636
 
1209
2637
  //!
1210
- //! \struct NonzeroParam
2638
+ //! \struct NormRopeReshapeParam
1211
2639
  //!
1212
- //! \brief 输出非零值索引。
2640
+ //! \brief 融合rmsnorm、rope、reshapeAndCache。
1213
2641
  //!
1214
- //! \warning 仅在Atlas 800I A2硬件上支持
2642
+ //! \warning Atlas 800I A2推理产品支持该算子
1215
2643
  //!
1216
- struct NonzeroParam {};
2644
+ struct NormRopeReshapeParam {
2645
+ //! \brief precisionMode,精度模式。
2646
+ uint32_t precisionMode = 0;
2647
+ //! \brief rotaryCoeff,算子内Rope部分计算的旋转系数。
2648
+ uint32_t rotaryCoeff = 2;
2649
+ //! \brief epsilon,归一化时加在分母上防止除零。
2650
+ float epsilon = 1e-5;
2651
+ //!
2652
+ //! \brief 预留参数
2653
+ //!
2654
+ //! \note 默认值为1e-5。
2655
+ //!
2656
+ uint8_t rsv[16] = {0};
2657
+ };
1217
2658
 
1218
2659
  //!
1219
- //! \struct OnehotParam
2660
+ //! \struct FusedAddTopkDivParam
1220
2661
  //!
1221
- //! \brief onehot编码。
2662
+ //! \brief Deepseek融合算子:Sigmoid+Add+GroupTopk+Gather+ReduceSum,RealDiv,Muls。
1222
2663
  //!
1223
- struct OnehotParam {
1224
- //! \brief depth所在下标。可为负数。
1225
- int64_t axis = 0;
1226
- //! \brief 类别数。
1227
- int64_t depth = 0;
2664
+ //! \note OP详细描述。
2665
+ //!
2666
+ //! \warning 当前仅支持Atlas 800I A2 推理产品、Atlas A2 训练系列产品和Atlas A3 训练系列产品。
2667
+ //!
2668
+ struct FusedAddTopkDivParam {
2669
+ //!
2670
+ //! \brief 分组数量。
2671
+ //!
2672
+ //! \note 默认值为1。
2673
+ //!
2674
+ //! \warning 取值大于0。
2675
+ //!
2676
+ uint32_t groupNum = 1;
2677
+ //!
2678
+ //! \brief 选择k个组。
2679
+ //!
2680
+ //! \note 默认值为1。
2681
+ //!
2682
+ //! \warning 取值范围为(0, groupNum]。
2683
+ //!
2684
+ uint32_t groupTopk = 1;
2685
+ //!
2686
+ //! \brief 组内选取n个最大值求和。
2687
+ //!
2688
+ //! \note 默认值为1。
2689
+ //!
2690
+ //! \warning 取值大于0。
2691
+ //!
2692
+ uint32_t n = 1;
2693
+ //!
2694
+ //! \brief topk选择前k个值。
2695
+ //!
2696
+ //! \note 默认值为1。
2697
+ //!
2698
+ //! \warning 取值大于0。
2699
+ //!
2700
+ uint32_t k = 1;
2701
+ //!
2702
+ //! \brief 激活类型。
2703
+ //!
2704
+ //! \note 默认值为ACTIVATION_SIGMOID。
2705
+ //!
2706
+ //! \warning 取值范围为ACTIVATION_SIGMOID。
2707
+ //!
2708
+ ActivationType activationType = ACTIVATION_SIGMOID;
2709
+ //!
2710
+ //! \brief 是否归一化。
2711
+ //!
2712
+ //! \note 默认值为true。
2713
+ //!
2714
+ //! \warning 取值范围为true。
2715
+ //!
2716
+ bool isNorm = true;
2717
+ //!
2718
+ //! \brief 归一化后的乘系数。
2719
+ //!
2720
+ //! \note 默认值为1.0。
2721
+ //!
2722
+ //! \warning 取值范围为任意值。
2723
+ //!
2724
+ float scale = 1.0f;
2725
+ //!
2726
+ //! \brief 是否使能物理专家向逻辑专家的映射。
2727
+ //!
2728
+ //! \note 默认值为false。
2729
+ //!
2730
+ //! \warning 取值范围为false/true。
2731
+ //!
2732
+ bool enableExpertMapping = false;
2733
+ //!
2734
+ //! \brief 预留参数。
2735
+ //!
2736
+ //! \note 默认为全0的数组。
2737
+ //!
2738
+ //! \warning 数组元素必须均为0。
2739
+ //!
2740
+ uint8_t rsv[27] = {0};
1228
2741
  };
1229
2742
 
1230
2743
  //!
1231
- //! \struct IndexAddParam
2744
+ //! \struct MlaPreprocessParam
1232
2745
  //!
1233
- //! \brief 固定维度的指定下标加上某个特定值。
2746
+ //! \brief 融合rmsNormQuant、matmul、rope、reshapeAndCache,用于MLA预处理。
1234
2747
  //!
1235
- struct IndexAddParam {
2748
+ //! \warning 所有参数目前均为未使用的预留参数,需支持泛化后启用,仅Atlas 800I A2推理产品支持该算子
2749
+ //!
2750
+ struct MlaPreprocessParam {
1236
2751
  //!
1237
- //! \enum IndexType
2752
+ //! \brief 经过matmul后拆分的dim大小
1238
2753
  //!
1239
- //! \brief 指定下标需要执行的操作类型。
2754
+ uint32_t wdqDim = 0;
1240
2755
  //!
1241
- enum IndexType {
1242
- INDEX_UNDEFINED = 0, //!< 默认值
1243
- INDEX_ADD, //!<
2756
+ //! \brief q传入rope的dim大小
2757
+ //!
2758
+ uint32_t qRopeDim = 0;
2759
+ //!
2760
+ //! \brief k传入rope的dim大小
2761
+ //!
2762
+ uint32_t kRopeDim = 0;
2763
+ //!
2764
+ //! \brief epsilon,放在分母上防止除0。
2765
+ //!
2766
+ float epsilon = 1e-5;
2767
+ //!
2768
+ //! \brief q旋转系数,对半旋转是2,支持配置2、4或headDim。
2769
+ //!
2770
+ int32_t qRotaryCoeff = 2;
2771
+ //!
2772
+ //! \brief k旋转系数,对半旋转是2,支持配置2、4或headDim。
2773
+ //!
2774
+ int32_t kRotaryCoeff = 2;
2775
+ //!
2776
+ //! \brief wdq是否转置
2777
+ //!
2778
+ bool transposeWdq = true;
2779
+ //!
2780
+ //! \brief wuq是否转置
2781
+ //!
2782
+ bool transposeWuq = true;
2783
+ //!
2784
+ //! \brief wuk是否转置
2785
+ //!
2786
+ bool transposeWuk = true;
2787
+ //!
2788
+ //! \enum CacheMode
2789
+ //!
2790
+ //! \brief 指定cache的类型。
2791
+ //!
2792
+ enum CacheMode : uint8_t {
2793
+ KVCACHE = 0,
2794
+ KROPE_CTKV,
2795
+ INT8_NZCACHE,
2796
+ NZCACHE,
1244
2797
  };
1245
- //! \brief 指定下标需要执行的操作类型。
1246
- IndexType indexType = INDEX_UNDEFINED;
1247
- //! \brief 输入Tensor需加上updates更新值的轴。可为负数。值小于var的维度数。
1248
- int64_t axis = 0;
2798
+ //!
2799
+ //! \brief 指定cache的类型。
2800
+ //!
2801
+ CacheMode cacheMode = KVCACHE;
2802
+ //!
2803
+ //! \enum QuantMode
2804
+ //!
2805
+ //! \brief 指定RmsNorm量化的类型。
2806
+ //!
2807
+ enum QuantMode : uint16_t {
2808
+ PER_TENSOR_QUANT_ASYMM = 0,
2809
+ PER_TOKEN_QUANT_SYMM,
2810
+ PER_TOKEN_QUANT_ASYMM,
2811
+ UNQUANT,
2812
+ };
2813
+ //!
2814
+ //! \brief 指定RmsNorm量化的类型。
2815
+ //!
2816
+ QuantMode quantMode = PER_TENSOR_QUANT_ASYMM;
2817
+ //!
2818
+ //! \brief 预留参数
2819
+ //!
2820
+ uint8_t rsv[34] = {0};
1249
2821
  };
1250
2822
 
1251
2823
  //!
1252
- //! \struct SendParam
2824
+ //! \struct ReshapeAndCacheOmniParam
1253
2825
  //!
1254
- //! \brief 将当前通信卡的输入发送至指定通信卡上.
2826
+ //! \brief omni压缩配套使用的reshapeAndCache
1255
2827
  //!
1256
- //! rank、rankSize、rankRoot需满足以下条件:
1257
- //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ destRank < rankSize
2828
+ //! \warning 仅Atlas 800I A2推理产品支持该算子
1258
2829
  //!
1259
- //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1260
- //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
2830
+ struct ReshapeAndCacheOmniParam {
2831
+ //!
2832
+ //! \brief 预留参数
2833
+ //!
2834
+ uint8_t rsv[8] = {0};
2835
+ };
2836
+
1261
2837
  //!
1262
- //! \code
1263
- //! rm -rf /dev/shm/sem.lccl*
1264
- //! rm -rf /dev/shm/sem.hccl*
1265
- //! ipcrm -a
1266
- //! \endcode
2838
+ //! \brief MultiLatentAttention.
1267
2839
  //!
1268
- struct SendParam {
1269
- //! \brief 每张卡所属通信编号
1270
- int rank = 0;
1271
- //! \brief 通信的卡的数量
1272
- int rankSize = 0;
1273
- //! \brief 主通信编号
1274
- int rankRoot = 0;
1275
- //! \brief 通信域内数据接收端的rank编号.
1276
- uint32_t destRank = 1;
1277
- //! \brief 通信后端指示,仅支持"hccl".
1278
- std::string backend = "hccl";
1279
- //! \brief HCCL通信域指针
1280
- HcclComm hcclComm = nullptr;
1281
- //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
1282
- CommMode commMode = COMM_MULTI_PROCESS;
2840
+ struct MultiLatentAttentionParam {
1283
2841
  //!
1284
- //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2842
+ //! \brief query头大小
1285
2843
  //!
1286
- //! ranktable配置参考
1287
- //! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
2844
+ int32_t headNum = 0;
1288
2845
  //!
1289
- std::string rankTableFile;
1290
- //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
1291
- std::string commDomain;
2846
+ //! \brief 算子tor值, 在Q*K^T后乘
2847
+ //!
2848
+ float qkScale = 1.0;
2849
+ //!
2850
+ //! \brief kv头数量
2851
+ //!
2852
+ int32_t kvHeadNum = 0;
2853
+ //!
2854
+ //! \enum MaskType
2855
+ //!
2856
+ //! \brief The type values of MaskType.
2857
+ //!
2858
+ enum MaskType : int {
2859
+ UNDEFINED = 0, //!< 默认值,全0的mask
2860
+ MASK_TYPE_SPEC, //!< qseqlen > 1时的mask
2861
+ MASK_TYPE_MASK_FREE, //!< mask free
2862
+ };
2863
+ //!
2864
+ //! \brief mask类型
2865
+ //!
2866
+ MaskType maskType = UNDEFINED;
2867
+ //!
2868
+ //! \enum CalcType
2869
+ //!
2870
+ //! \brief The type values of CalcType.
2871
+ //!
2872
+ enum CalcType : int {
2873
+ CALC_TYPE_UNDEFINED = 0, // 默认值
2874
+ CALC_TYPE_SPEC, // 支持传入大于1的qseqlen
2875
+ CALC_TYPE_RING, // ringAttention
2876
+ };
2877
+ //!
2878
+ //! \brief CalcType类型
2879
+ //!
2880
+ CalcType calcType = CALC_TYPE_UNDEFINED;
2881
+ //!
2882
+ //! \enum CacheMode
2883
+ //!
2884
+ //! \brief 指定cache的类型。
2885
+ //!
2886
+ enum CacheMode : uint8_t {
2887
+ KVCACHE = 0, // 拼接cache
2888
+ KROPE_CTKV, // 分离cache,默认值
2889
+ INT8_NZCACHE, // 高性能分离cache
2890
+ NZCACHE, // 非量化NZcache
2891
+ };
2892
+ //!
2893
+ //! \brief 指定cache的类型。
2894
+ //!
2895
+ CacheMode cacheMode = KVCACHE;
2896
+ //!
2897
+ //! \brief 预留参数
2898
+ //!
2899
+ uint8_t rsv[43] = {0};
1292
2900
  };
1293
2901
 
1294
2902
  //!
1295
- //! \struct RecvParam
2903
+ //! \struct RazorFusionAttentionParam
1296
2904
  //!
1297
- //! \brief 从当前通信卡接收来自指定通信卡的数据.
2905
+ //! \brief 多模态场景
1298
2906
  //!
1299
- //! rank、rankSize、rankRoot需满足以下条件:
1300
- //! 0 ≤ rank < rankSize, 0 ≤ rankRoot < rankSize, 0 ≤ srcRank < rankSize
2907
+ struct RazorFusionAttentionParam {
2908
+ //!
2909
+ //! \brief 算子headSize值, query头大小
2910
+ //!
2911
+ int32_t headNum = 1;
2912
+ //!
2913
+ //! \brief 算子kvHead值, kv头数量
2914
+ //!
2915
+ int32_t kvHeadNum = 1;
2916
+ //!
2917
+ //! \brief 算子tor值, 在Q*K^T后乘
2918
+ //!
2919
+ float qkScale = 1;
2920
+ //!
2921
+ //! \brief 图片的长度
2922
+ //!
2923
+ int32_t razorLen = 0;
2924
+ //!
2925
+ //! \brief 用于稀疏计算,表示attention需要和前几个Token计算关联,128的倍数
2926
+ //!
2927
+ int32_t preTokens = 0;
2928
+ //!
2929
+ //! \brief 用于稀疏计算,表示attention需要和前几个Token计算关联,128的倍数
2930
+ //!
2931
+ int32_t nextTokens = 0;
2932
+ //!
2933
+ //! \brief Q方向上图片的个数
2934
+ //!
2935
+ int32_t tileQ = 0;
2936
+ //!
2937
+ //! \brief Kv方向图片的个数
2938
+ //!
2939
+ int32_t tileKv = 0;
2940
+ //!
2941
+ //! \brief Q方向文本Token数量
2942
+ //!
2943
+ int32_t textQLen = 0;
2944
+ //!
2945
+ //! \brief Kv方向文本Token数量
2946
+ //!
2947
+ int32_t textKvLen = 0;
2948
+ //!
2949
+ //! \brief 预留参数
2950
+ //!
2951
+ uint8_t rsv[64] = {0};
2952
+ };
2953
+
1301
2954
  //!
1302
- //! \note 1、多用户使用时需要使用ATB_SHARE_MEMORY_NAME_SUFFIX环境变量进行共享内存的区分,以进行初始化信息同步.
1303
- //! \note 2、当使用加速库的通信算子异常退出时,需要清空残留数据,避免影响之后的使用,命令参考如下:
2955
+ //! \struct FaUpdateParam
1304
2956
  //!
1305
- //! \code
1306
- //! rm -rf /dev/shm/sem.lccl*
1307
- //! rm -rf /dev/shm/sem.hccl*
1308
- //! ipcrm -a
1309
- //! \endcode
2957
+ //! \brief 主要功能为将flash attention输出的中间结果rowmax, rowsum, attention out三个局部结果更新成全局结果
1310
2958
  //!
1311
- struct RecvParam {
1312
- //! \brief 每张卡所属通信编号
1313
- int rank = 0;
1314
- //! \brief 通信的卡的数量
1315
- int rankSize = 0;
1316
- //! \brief 主通信编号
1317
- int rankRoot = 0;
1318
- //! \brief 通信域内数据发送端的rank编号.
1319
- uint32_t srcRank = 1;
1320
- //! \brief 通信后端指示,仅支持"hccl".
1321
- std::string backend = "hccl";
1322
- //! \brief HCCL通信域指针
1323
- HcclComm hcclComm = nullptr;
1324
- //! \brief 通信模式,CommMode类型枚举值。hccl多线程只支持外部传入通信域方式
1325
- CommMode commMode = COMM_MULTI_PROCESS;
2959
+ struct FaUpdateParam {
1326
2960
  //!
1327
- //! \brief 集群信息的配置文件路径,适用单机以及多机通信场景,当前仅支持hccl后端场景,若单机配置了rankTable,则以ranktable来初始化通信域。
2961
+ //! \enum FaUpdateType
1328
2962
  //!
1329
- //! ranktable配置参考
1330
- //! https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/tfmigr1/tfmigr1_000029.html
2963
+ //! \brief 指定下标需要执行的操作类型。
1331
2964
  //!
1332
- std::string rankTableFile;
1333
- //! \brief 通信device组用通信域名标识,多通信域时使用,当前仅支持hccl
1334
- std::string commDomain;
2965
+ enum FaUpdateType {
2966
+ DECODE_UPDATE = 0, //!< 默认值。decode_update。
2967
+ };
2968
+ //!
2969
+ //! \brief 指定下标需要执行的操作类型。
2970
+ //!
2971
+ //! \warning 目前支持取值为DECODE_UPDATE。
2972
+ //!
2973
+ FaUpdateType faUpdateType = DECODE_UPDATE;
2974
+ //!
2975
+ //! \brief 序列并行的并行度SP。
2976
+ //!
2977
+ //! \note 默认值为1。
2978
+ //!
2979
+ uint32_t sp = 1;
2980
+ //!
2981
+ //! \brief 预留参数
2982
+ //!
2983
+ uint8_t rsv[64] = {0};
1335
2984
  };
1336
2985
 
1337
- } // namespace infer
1338
- } // namespace atb
2986
+ //!
2987
+ //! \struct PagedCacheLoadParam
2988
+ //!
2989
+ //! \brief reshapeandcache反向
2990
+ //!
2991
+ struct PagedCacheLoadParam {
2992
+ //!
2993
+ //! \brief 预留参数
2994
+ //!
2995
+ uint8_t rsv[64] = {0};
2996
+ };
2997
+ } // namespace infer
2998
+ } // namespace atb
1339
2999
  #endif