mindspore 2.4.1__cp39-none-any.whl → 2.5.0__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (847) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +39 -0
  3. mindspore/__init__.py +8 -3
  4. mindspore/_akg/akg/composite/build_module.py +6 -2
  5. mindspore/_akg/akg/utils/kernel_exec.py +2 -2
  6. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  7. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  8. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  9. mindspore/_checkparam.py +0 -5
  10. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  11. mindspore/_extends/parse/compile_config.py +64 -0
  12. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  13. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
  14. mindspore/_extends/parse/parser.py +23 -5
  15. mindspore/_extends/parse/standard_method.py +123 -27
  16. mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
  17. mindspore/amp.py +7 -1
  18. mindspore/boost/boost_cell_wrapper.py +136 -41
  19. mindspore/common/__init__.py +3 -1
  20. mindspore/common/_register_for_tensor.py +0 -1
  21. mindspore/common/_stub_tensor.py +25 -4
  22. mindspore/common/_tensor_cpp_method.py +17 -0
  23. mindspore/common/_tensor_docs.py +6132 -0
  24. mindspore/common/api.py +99 -25
  25. mindspore/common/dtype.py +34 -34
  26. mindspore/common/dump.py +2 -1
  27. mindspore/common/file_system.py +8 -1
  28. mindspore/common/generator.py +2 -0
  29. mindspore/common/hook_handle.py +3 -1
  30. mindspore/common/initializer.py +3 -4
  31. mindspore/common/lazy_inline.py +8 -2
  32. mindspore/common/mindir_util.py +10 -2
  33. mindspore/common/parameter.py +30 -27
  34. mindspore/common/tensor.py +713 -1337
  35. mindspore/communication/__init__.py +1 -1
  36. mindspore/communication/_comm_helper.py +10 -0
  37. mindspore/communication/comm_func.py +215 -173
  38. mindspore/communication/management.py +23 -20
  39. mindspore/context.py +292 -193
  40. mindspore/dataset/__init__.py +23 -19
  41. mindspore/dataset/callback/ds_callback.py +2 -1
  42. mindspore/dataset/core/config.py +84 -3
  43. mindspore/dataset/engine/cache_admin.py +3 -3
  44. mindspore/dataset/engine/cache_client.py +5 -4
  45. mindspore/dataset/engine/datasets.py +192 -149
  46. mindspore/dataset/engine/datasets_audio.py +14 -0
  47. mindspore/dataset/engine/datasets_standard_format.py +28 -11
  48. mindspore/dataset/engine/datasets_text.py +38 -1
  49. mindspore/dataset/engine/datasets_user_defined.py +125 -65
  50. mindspore/dataset/engine/datasets_vision.py +81 -8
  51. mindspore/dataset/engine/iterators.py +281 -63
  52. mindspore/dataset/engine/obs/util.py +8 -0
  53. mindspore/dataset/engine/queue.py +40 -0
  54. mindspore/dataset/engine/samplers.py +26 -2
  55. mindspore/dataset/engine/serializer_deserializer.py +1 -1
  56. mindspore/dataset/engine/validators.py +43 -11
  57. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  58. mindspore/dataset/transforms/transforms.py +29 -12
  59. mindspore/dataset/vision/validators.py +1 -2
  60. mindspore/device_context/__init__.py +21 -0
  61. mindspore/device_context/ascend/__init__.py +25 -0
  62. mindspore/device_context/ascend/device.py +72 -0
  63. mindspore/device_context/ascend/op_debug.py +94 -0
  64. mindspore/device_context/ascend/op_precision.py +193 -0
  65. mindspore/device_context/ascend/op_tuning.py +127 -0
  66. mindspore/device_context/cpu/__init__.py +25 -0
  67. mindspore/device_context/cpu/device.py +62 -0
  68. mindspore/device_context/cpu/op_tuning.py +43 -0
  69. mindspore/device_context/gpu/__init__.py +21 -0
  70. mindspore/device_context/gpu/device.py +70 -0
  71. mindspore/device_context/gpu/op_precision.py +67 -0
  72. mindspore/device_context/gpu/op_tuning.py +175 -0
  73. mindspore/device_manager.py +134 -0
  74. mindspore/experimental/llm_boost/__init__.py +3 -2
  75. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  76. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  77. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  78. mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
  79. mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
  80. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  81. mindspore/experimental/llm_boost/register.py +1 -0
  82. mindspore/experimental/optim/adadelta.py +26 -22
  83. mindspore/experimental/optim/adam.py +3 -0
  84. mindspore/experimental/optim/lr_scheduler.py +33 -24
  85. mindspore/experimental/optim/radam.py +33 -30
  86. mindspore/hal/device.py +28 -0
  87. mindspore/hal/event.py +17 -0
  88. mindspore/hal/memory.py +94 -3
  89. mindspore/hal/stream.py +91 -6
  90. mindspore/include/api/context.h +1 -2
  91. mindspore/include/dataset/constants.h +2 -2
  92. mindspore/lib/libavcodec.so.59 +0 -0
  93. mindspore/lib/libavdevice.so.59 +0 -0
  94. mindspore/lib/libavfilter.so.8 +0 -0
  95. mindspore/lib/libavformat.so.59 +0 -0
  96. mindspore/lib/libavutil.so.57 +0 -0
  97. mindspore/lib/libdnnl.so.2 +0 -0
  98. mindspore/lib/libmindspore_backend.so +0 -0
  99. mindspore/lib/libmindspore_common.so +0 -0
  100. mindspore/lib/libmindspore_core.so +0 -0
  101. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  102. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  103. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  104. mindspore/lib/libmindspore_ops.so +0 -0
  105. mindspore/lib/libmpi_adapter.so +0 -0
  106. mindspore/lib/libmpi_collective.so +0 -0
  107. mindspore/lib/libnnacl.so +0 -0
  108. mindspore/lib/libopencv_core.so.4.5 +0 -0
  109. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  110. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  111. mindspore/lib/libps_cache.so +0 -0
  112. mindspore/lib/libswresample.so.4 +0 -0
  113. mindspore/lib/libswscale.so.6 +0 -0
  114. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json +2048 -0
  115. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  116. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  117. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  118. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +10 -0
  119. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  120. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +182 -0
  121. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.py +51 -16
  122. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.py +51 -16
  123. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  124. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  125. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  126. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  127. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  128. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  129. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  130. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  131. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  132. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  133. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  134. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  135. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  136. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  137. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  138. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  139. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  140. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  141. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  142. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  143. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  144. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  145. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  146. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  147. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  148. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  149. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  150. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  151. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  152. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  153. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  154. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  155. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +302 -0
  156. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +892 -0
  157. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +892 -0
  158. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  159. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  160. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +33 -0
  161. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  162. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +1 -0
  163. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +14 -0
  164. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +59 -0
  165. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +59 -0
  166. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/lib/libcust_opapi.so +0 -0
  167. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.py +51 -16
  168. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +192 -0
  169. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +215 -0
  170. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +274 -0
  171. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +215 -0
  172. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +80 -0
  173. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  174. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +80 -0
  175. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  176. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +80 -0
  177. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  178. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  179. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  180. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  181. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  182. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  183. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  184. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  185. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  186. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  187. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  188. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  189. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  190. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  191. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  192. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  193. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  194. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  195. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  196. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  197. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  198. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  199. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  200. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  201. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  202. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  203. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  204. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  205. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  206. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  207. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  208. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  209. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  210. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  211. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o} +0 -0
  212. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  213. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o} +0 -0
  214. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  215. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o} +0 -0
  216. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  217. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  218. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  219. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  220. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  221. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  222. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  223. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  224. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  225. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  226. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  227. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  228. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  229. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  230. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  231. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  232. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  233. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  234. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  235. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  236. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  237. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  238. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  239. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  240. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  241. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  242. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  243. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  244. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  245. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  246. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  247. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910_93/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  248. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  249. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  250. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  251. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  252. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  253. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  254. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  255. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  256. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  257. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  258. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  259. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  260. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  261. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  262. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  263. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  264. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  265. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  266. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  267. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  268. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  269. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  270. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  271. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  272. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  273. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  274. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  275. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  276. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  277. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  278. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  279. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  280. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  281. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  282. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  283. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  284. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  285. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  286. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/all_finite.json +139 -0
  287. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +361 -0
  288. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +892 -0
  289. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +892 -0
  290. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/all_finite.json +139 -0
  291. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/binary_info_config.json +361 -0
  292. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/decoder_kv_cache.json +892 -0
  293. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910_93/prompt_kv_cache.json +892 -0
  294. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +139 -0
  295. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +361 -0
  296. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +892 -0
  297. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +892 -0
  298. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  299. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  300. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  301. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -0
  302. mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
  303. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  304. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  305. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  306. mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
  307. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  308. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  309. mindspore/lib/plugin/ascend/libms_ascend_native_boost.so +0 -0
  310. mindspore/lib/plugin/ascend/libms_atb_boost.so +0 -0
  311. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  312. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  313. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +957 -955
  314. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  315. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  316. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  317. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  318. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  319. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{acme/include/base_type.h → base_type.h} +25 -20
  320. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{cast/cast_tiling.h → internal.h} +6 -4
  321. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_op.h +114 -0
  322. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/llm/boost_kernel.h +70 -0
  323. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/llm/llama_impl.h +85 -0
  324. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/llm/model_interface.h +52 -0
  325. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/llm/tensor.h +81 -0
  326. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_creator.h +123 -0
  327. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +155 -110
  328. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{acme/include/tiling_info.h → tiling_info.h} +12 -9
  329. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tiling_utils.h +178 -0
  330. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_op.so +0 -0
  331. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_op.so +0 -0
  332. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_op.so +0 -0
  333. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_op.so +0 -0
  334. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_op.so +0 -0
  335. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_op.so +0 -0
  336. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcompare_op.so +0 -0
  337. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_op.so +0 -0
  338. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libllama_op.so +0 -0
  339. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_op.so +0 -0
  340. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  341. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_optiling.so +0 -0
  342. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_op.so +0 -0
  343. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_op.so +0 -0
  344. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_op.so +0 -0
  345. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_op.so +0 -0
  346. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz.o +0 -0
  347. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_f16_nz/internal_pp_matmul_f16_nz_0.o +0 -0
  348. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress.o +0 -0
  349. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_i8_nz_compress/internal_pp_matmul_i8_nz_compress_0.o +0 -0
  350. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz.o +0 -0
  351. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/object_kernels/internal_pp_matmul_int8_nz/internal_pp_matmul_int8_nz_0.o +0 -0
  352. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libadd_rms_norm_quant_ascend310p.so +0 -0
  353. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libapply_rotary_pos_emb_310p_impl.so → op_kernels/ascend310p/so_kernels/libapply_rotary_pos_emb_310p_ascend310p.so} +0 -0
  354. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcast_ascend310p.so +0 -0
  355. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libcompare_ascend310p.so +0 -0
  356. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libgelu_ascend310p.so +0 -0
  357. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libmatmul_ascend310p.so +0 -0
  358. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend310p/so_kernels/libreshape_and_cache_nz_ascend310p.so +0 -0
  359. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/hphol_kernels/add_rms_norm_dynamic_quant/AddRmsNormDynamicQuant_4b60f88cdc28b25a36bad2d8b0a88092.json +163 -0
  360. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/hphol_kernels/add_rms_norm_dynamic_quant/AddRmsNormDynamicQuant_4b60f88cdc28b25a36bad2d8b0a88092.o +0 -0
  361. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/hphol_kernels/add_rms_norm_dynamic_quant/AddRmsNormDynamicQuant_cde61da2bd6fededcb1ba310a6ad16ee.json +163 -0
  362. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/hphol_kernels/add_rms_norm_dynamic_quant/AddRmsNormDynamicQuant_cde61da2bd6fededcb1ba310a6ad16ee.o +0 -0
  363. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  364. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  365. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  366. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  367. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  368. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  369. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  370. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  371. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix.o +0 -0
  372. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aic_0.o +0 -0
  373. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_matmul_postfusion_mix/internal_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  374. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix.o +0 -0
  375. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aic_0.o +0 -0
  376. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/internal_multi_weight_matmul_postfusion_mix/internal_multi_weight_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  377. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_bf16_bf16.o +0 -0
  378. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_bf16_fp16.o +0 -0
  379. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_bf16_fp32.o +0 -0
  380. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_fp16_bf16.o +0 -0
  381. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_fp16_fp16.o +0 -0
  382. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/{matmul_add_rmsnorm → object_kernels/matmul_add_rmsnorm}/matmul_add_rmsnorm_fp16_fp32.o +0 -0
  383. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2.o +0 -0
  384. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aic_0.o +0 -0
  385. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/object_kernels/paged_attention_v2/paged_attention_v2_mix_aiv_0.o +0 -0
  386. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libadd_layer_norm_impl.so → op_kernels/ascend910b/so_kernels/libadd_layer_norm_ascend910b.so} +0 -0
  387. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libadd_rms_norm_impl.so → op_kernels/ascend910b/so_kernels/libadd_rms_norm_ascend910b.so} +0 -0
  388. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libadd_rms_norm_quant_ascend910b.so +0 -0
  389. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libapply_rotary_pos_emb_impl.so → op_kernels/ascend910b/so_kernels/libapply_rotary_pos_emb_ascend910b.so} +0 -0
  390. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libcast_impl.so → op_kernels/ascend910b/so_kernels/libcast_ascend910b.so} +0 -0
  391. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libnot_equal_impl.so → op_kernels/ascend910b/so_kernels/libcompare_ascend910b.so} +0 -0
  392. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libgelu_impl.so → op_kernels/ascend910b/so_kernels/libgelu_ascend910b.so} +0 -0
  393. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/so_kernels/libllama_ascend910b.so +0 -0
  394. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libmatmul_impl.so → op_kernels/ascend910b/so_kernels/libmatmul_ascend910b.so} +0 -0
  395. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libmulti_weight_matmul_kernel_impl.so → op_kernels/ascend910b/so_kernels/libmulti_weight_matmul_kernel_ascend910b.so} +0 -0
  396. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/libreshape_and_cache_impl.so → op_kernels/ascend910b/so_kernels/libreshape_and_cache_ascend910b.so} +0 -0
  397. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/{lib/librms_norm_impl.so → op_kernels/ascend910b/so_kernels/librms_norm_ascend910b.so} +0 -0
  398. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  399. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  400. mindspore/log.py +12 -0
  401. mindspore/mindrecord/__init__.py +1 -1
  402. mindspore/mindrecord/config.py +17 -316
  403. mindspore/mindrecord/filereader.py +1 -9
  404. mindspore/mindrecord/filewriter.py +5 -15
  405. mindspore/mindrecord/mindpage.py +1 -9
  406. mindspore/mint/__init__.py +824 -218
  407. mindspore/mint/distributed/__init__.py +66 -4
  408. mindspore/mint/distributed/distributed.py +2594 -44
  409. mindspore/mint/linalg/__init__.py +6 -0
  410. mindspore/mint/nn/__init__.py +473 -14
  411. mindspore/mint/nn/functional.py +486 -11
  412. mindspore/mint/nn/layer/__init__.py +17 -4
  413. mindspore/mint/nn/layer/_functions.py +330 -0
  414. mindspore/mint/nn/layer/activation.py +169 -1
  415. mindspore/mint/nn/layer/basic.py +123 -0
  416. mindspore/mint/nn/layer/conv.py +727 -0
  417. mindspore/mint/nn/layer/normalization.py +215 -19
  418. mindspore/mint/nn/layer/padding.py +797 -0
  419. mindspore/mint/nn/layer/pooling.py +170 -0
  420. mindspore/mint/optim/__init__.py +2 -1
  421. mindspore/mint/optim/adam.py +223 -0
  422. mindspore/mint/optim/adamw.py +26 -19
  423. mindspore/mint/special/__init__.py +2 -1
  424. mindspore/multiprocessing/__init__.py +5 -0
  425. mindspore/nn/__init__.py +2 -0
  426. mindspore/nn/cell.py +142 -21
  427. mindspore/nn/dynamic_lr.py +2 -1
  428. mindspore/nn/layer/activation.py +6 -6
  429. mindspore/nn/layer/basic.py +35 -25
  430. mindspore/nn/layer/channel_shuffle.py +3 -3
  431. mindspore/nn/layer/conv.py +3 -0
  432. mindspore/nn/layer/embedding.py +3 -3
  433. mindspore/nn/layer/normalization.py +8 -7
  434. mindspore/nn/layer/padding.py +4 -3
  435. mindspore/nn/layer/pooling.py +55 -23
  436. mindspore/nn/layer/rnn_cells.py +1 -1
  437. mindspore/nn/layer/rnns.py +2 -1
  438. mindspore/nn/layer/timedistributed.py +5 -5
  439. mindspore/nn/layer/transformer.py +48 -26
  440. mindspore/nn/learning_rate_schedule.py +5 -3
  441. mindspore/nn/loss/loss.py +31 -36
  442. mindspore/nn/optim/ada_grad.py +1 -0
  443. mindspore/nn/optim/adadelta.py +2 -2
  444. mindspore/nn/optim/adam.py +1 -1
  445. mindspore/nn/optim/lars.py +1 -4
  446. mindspore/nn/optim/optimizer.py +1 -1
  447. mindspore/nn/optim/rprop.py +2 -2
  448. mindspore/nn/optim/thor.py +2 -1
  449. mindspore/nn/utils/__init__.py +22 -0
  450. mindspore/nn/utils/init.py +73 -0
  451. mindspore/nn/wrap/cell_wrapper.py +4 -6
  452. mindspore/nn/wrap/loss_scale.py +3 -4
  453. mindspore/numpy/array_creations.py +60 -62
  454. mindspore/numpy/array_ops.py +148 -143
  455. mindspore/numpy/logic_ops.py +41 -42
  456. mindspore/numpy/math_ops.py +361 -359
  457. mindspore/numpy/utils.py +16 -16
  458. mindspore/numpy/utils_const.py +4 -4
  459. mindspore/ops/__init__.py +2 -1
  460. mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
  461. mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
  462. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  463. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  464. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  465. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  466. mindspore/ops/_vmap/vmap_array_ops.py +20 -19
  467. mindspore/ops/_vmap/vmap_base.py +0 -2
  468. mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
  469. mindspore/ops/_vmap/vmap_math_ops.py +11 -9
  470. mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
  471. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
  472. mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
  473. mindspore/ops/auto_generate/gen_extend_func.py +554 -60
  474. mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
  475. mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
  476. mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
  477. mindspore/ops/composite/base.py +1 -1
  478. mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
  479. mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
  480. mindspore/ops/function/__init__.py +12 -0
  481. mindspore/ops/function/array_func.py +561 -159
  482. mindspore/ops/function/clip_func.py +64 -0
  483. mindspore/ops/function/debug_func.py +28 -20
  484. mindspore/ops/function/image_func.py +1 -1
  485. mindspore/ops/function/linalg_func.py +5 -4
  486. mindspore/ops/function/math_func.py +1664 -294
  487. mindspore/ops/function/nn_func.py +988 -317
  488. mindspore/ops/function/parameter_func.py +3 -56
  489. mindspore/ops/function/random_func.py +243 -33
  490. mindspore/ops/function/sparse_unary_func.py +1 -1
  491. mindspore/ops/functional.py +18 -5
  492. mindspore/ops/functional_overload.py +897 -0
  493. mindspore/ops/operations/__init__.py +3 -2
  494. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  495. mindspore/ops/operations/_grad_ops.py +2 -34
  496. mindspore/ops/operations/_infer_ops.py +2 -1
  497. mindspore/ops/operations/_inner_ops.py +38 -8
  498. mindspore/ops/operations/array_ops.py +45 -303
  499. mindspore/ops/operations/comm_ops.py +23 -17
  500. mindspore/ops/operations/custom_ops.py +7 -49
  501. mindspore/ops/operations/debug_ops.py +42 -47
  502. mindspore/ops/operations/inner_ops.py +6 -4
  503. mindspore/ops/operations/linalg_ops.py +3 -2
  504. mindspore/ops/operations/manually_defined/ops_def.py +185 -104
  505. mindspore/ops/operations/math_ops.py +11 -216
  506. mindspore/ops/operations/nn_ops.py +153 -310
  507. mindspore/ops/primitive.py +23 -21
  508. mindspore/ops/tensor_method.py +1669 -0
  509. mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
  510. mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
  511. mindspore/ops_generate/arg_handler.py +0 -61
  512. mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
  513. mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
  514. mindspore/ops_generate/base_generator.py +11 -0
  515. mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
  516. mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
  517. mindspore/ops_generate/functional_overload_py_generator.py +110 -0
  518. mindspore/ops_generate/functions_cc_generator.py +233 -0
  519. mindspore/ops_generate/gen_aclnn_implement.py +110 -114
  520. mindspore/ops_generate/gen_constants.py +157 -3
  521. mindspore/ops_generate/gen_ops.py +245 -990
  522. mindspore/ops_generate/gen_pyboost_func.py +97 -998
  523. mindspore/ops_generate/gen_utils.py +119 -33
  524. mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
  525. mindspore/ops_generate/op_api_proto.py +206 -0
  526. mindspore/ops_generate/op_def_py_generator.py +131 -0
  527. mindspore/ops_generate/op_prim_py_generator.py +480 -0
  528. mindspore/ops_generate/op_proto.py +373 -108
  529. mindspore/ops_generate/op_template_parser.py +436 -0
  530. mindspore/ops_generate/ops_def_cc_generator.py +288 -0
  531. mindspore/ops_generate/ops_def_h_generator.py +74 -0
  532. mindspore/ops_generate/ops_name_h_generator.py +68 -0
  533. mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
  534. mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
  535. mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
  536. mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
  537. mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
  538. mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
  539. mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
  540. mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
  541. mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
  542. mindspore/ops_generate/pyboost_utils.py +92 -33
  543. mindspore/ops_generate/template.py +294 -44
  544. mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
  545. mindspore/parallel/__init__.py +3 -3
  546. mindspore/parallel/_auto_parallel_context.py +44 -34
  547. mindspore/parallel/_cell_wrapper.py +22 -3
  548. mindspore/parallel/_parallel_serialization.py +13 -2
  549. mindspore/parallel/_utils.py +4 -2
  550. mindspore/parallel/algo_parameter_config.py +1 -1
  551. mindspore/parallel/checkpoint_transform.py +44 -0
  552. mindspore/parallel/cluster/process_entity/_api.py +131 -37
  553. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  554. mindspore/parallel/cluster/run.py +20 -3
  555. mindspore/parallel/parameter_broadcast.py +1 -1
  556. mindspore/parallel/shard.py +3 -0
  557. mindspore/parallel/transform_safetensors.py +119 -253
  558. mindspore/profiler/__init__.py +17 -4
  559. mindspore/profiler/analysis/__init__.py +0 -0
  560. mindspore/profiler/analysis/parser/__init__.py +0 -0
  561. mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
  562. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  563. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  564. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  565. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  566. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  567. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
  568. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  569. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
  570. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  571. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  572. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  573. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  574. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  575. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  576. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  577. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  578. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  579. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  580. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
  581. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  582. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  583. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  584. mindspore/profiler/analysis/task_manager.py +131 -0
  585. mindspore/profiler/analysis/time_converter.py +84 -0
  586. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  587. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
  588. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  589. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
  590. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
  591. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
  592. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
  593. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  594. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  595. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
  596. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  597. mindspore/profiler/analysis/work_flow.py +73 -0
  598. mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
  599. mindspore/profiler/common/command_executor.py +90 -0
  600. mindspore/profiler/common/constant.py +174 -3
  601. mindspore/profiler/common/file_manager.py +208 -0
  602. mindspore/profiler/common/log.py +130 -0
  603. mindspore/profiler/common/msprof_cmd_tool.py +202 -0
  604. mindspore/profiler/common/path_manager.py +371 -0
  605. mindspore/profiler/common/process_bar.py +168 -0
  606. mindspore/profiler/common/process_pool.py +9 -3
  607. mindspore/profiler/common/profiler_context.py +476 -0
  608. mindspore/profiler/common/profiler_info.py +304 -0
  609. mindspore/profiler/common/profiler_output_path.py +284 -0
  610. mindspore/profiler/common/profiler_parameters.py +210 -0
  611. mindspore/profiler/common/profiler_path_manager.py +120 -0
  612. mindspore/profiler/common/record_function.py +76 -0
  613. mindspore/profiler/common/tlv_decoder.py +76 -0
  614. mindspore/profiler/common/util.py +75 -2
  615. mindspore/profiler/dynamic_profiler.py +270 -37
  616. mindspore/profiler/envprofiler.py +138 -0
  617. mindspore/profiler/mstx.py +199 -0
  618. mindspore/profiler/platform/__init__.py +21 -0
  619. mindspore/profiler/platform/base_profiler.py +40 -0
  620. mindspore/profiler/platform/cpu_profiler.py +124 -0
  621. mindspore/profiler/platform/gpu_profiler.py +74 -0
  622. mindspore/profiler/platform/npu_profiler.py +309 -0
  623. mindspore/profiler/profiler.py +580 -93
  624. mindspore/profiler/profiler_action_controller.py +187 -0
  625. mindspore/profiler/profiler_interface.py +114 -0
  626. mindspore/profiler/schedule.py +208 -0
  627. mindspore/rewrite/api/symbol_tree.py +1 -2
  628. mindspore/run_check/_check_version.py +18 -13
  629. mindspore/runtime/__init__.py +37 -0
  630. mindspore/runtime/device.py +27 -0
  631. mindspore/runtime/event.py +209 -0
  632. mindspore/runtime/executor.py +148 -0
  633. mindspore/runtime/memory.py +392 -0
  634. mindspore/runtime/stream.py +460 -0
  635. mindspore/runtime/thread_bind_core.py +401 -0
  636. mindspore/train/__init__.py +2 -2
  637. mindspore/train/_utils.py +53 -18
  638. mindspore/train/amp.py +8 -4
  639. mindspore/train/callback/_checkpoint.py +32 -18
  640. mindspore/train/callback/_early_stop.py +1 -1
  641. mindspore/train/callback/_flops_collector.py +105 -69
  642. mindspore/train/callback/_history.py +1 -1
  643. mindspore/train/callback/_summary_collector.py +44 -6
  644. mindspore/train/callback/_tft_register.py +37 -15
  645. mindspore/train/dataset_helper.py +11 -11
  646. mindspore/train/metrics/precision.py +4 -5
  647. mindspore/train/mind_ir_pb2.py +167 -46
  648. mindspore/train/model.py +13 -14
  649. mindspore/train/serialization.py +461 -72
  650. mindspore/train/summary/summary_record.py +1 -2
  651. mindspore/train/train_thor/model_thor.py +1 -1
  652. mindspore/utils/__init__.py +4 -2
  653. mindspore/utils/bin/dataset-cache +0 -0
  654. mindspore/utils/bin/dataset-cache-server +0 -0
  655. mindspore/utils/dryrun.py +138 -0
  656. mindspore/utils/runtime_execution_order_check.py +550 -0
  657. mindspore/version.py +1 -1
  658. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
  659. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +672 -479
  660. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
  661. mindspore/_data_dump.cpython-39-aarch64-linux-gnu.so +0 -0
  662. mindspore/bin/cache_admin +0 -0
  663. mindspore/bin/cache_server +0 -0
  664. mindspore/common/_tensor_overload.py +0 -139
  665. mindspore/lib/libmindspore_np_dtype.so +0 -0
  666. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  667. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  668. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  669. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  670. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  671. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  672. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  673. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  674. mindspore/lib/plugin/ascend/custom_ascendc_ops/version.info +0 -1
  675. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
  676. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -82
  677. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -113
  678. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -193
  679. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/dtype_registry.h +0 -90
  680. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -46
  681. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
  682. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
  683. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_layer_norm_op.h +0 -60
  684. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_rms_norm_op.h +0 -50
  685. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_rms_norm_quant_op.h +0 -50
  686. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/apply_rotary_pos_emb_nz_op.h +0 -42
  687. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/apply_rotary_pos_emb_op.h +0 -55
  688. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -34
  689. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_only_ops.h +0 -94
  690. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_op_base.h +0 -97
  691. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
  692. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h +0 -92
  693. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/gelu_op.h +0 -44
  694. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_add_rmsnorm_op.h +0 -73
  695. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -108
  696. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/multi_impls_op.h +0 -64
  697. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/multi_weight_matmul_op.h +0 -91
  698. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +0 -99
  699. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/reshape_and_cache_nz_op.h +0 -44
  700. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/reshape_and_cache_op.h +0 -44
  701. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +0 -64
  702. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -179
  703. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -69
  704. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/profiling_util.h +0 -366
  705. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -56
  706. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/kernel/add.h +0 -21
  707. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/tiling/add_tiling.h +0 -43
  708. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -46
  709. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb.h +0 -23
  710. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_base.h +0 -456
  711. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_bf16.h +0 -217
  712. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp.h +0 -391
  713. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp16.h +0 -126
  714. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -230
  715. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_tiling.h +0 -43
  716. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_value.h +0 -27
  717. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/apply_rotary_pos_emb_nz_impl.h +0 -34
  718. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz.h +0 -23
  719. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz_base.h +0 -460
  720. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz_fp16.h +0 -116
  721. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz_fp32.h +0 -230
  722. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz_tiling.h +0 -43
  723. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb_nz/kernel/apply_rotary_pos_emb_nz_value.h +0 -27
  724. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -74
  725. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -74
  726. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_impl.h +0 -48
  727. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/kernel/cast_kernel.h +0 -21
  728. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -55
  729. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_tiling.h +0 -27
  730. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/kernel/compare_kernel.h +0 -23
  731. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
  732. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
  733. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
  734. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
  735. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
  736. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
  737. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
  738. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
  739. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
  740. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
  741. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
  742. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
  743. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
  744. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
  745. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
  746. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
  747. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
  748. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
  749. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
  750. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
  751. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
  752. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
  753. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
  754. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
  755. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
  756. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
  757. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
  758. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
  759. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
  760. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
  761. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
  762. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
  763. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
  764. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
  765. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
  766. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
  767. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +0 -68
  768. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -99
  769. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +0 -21
  770. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/lccl/lccl_wrapper.h +0 -58
  771. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/ms_int_types.h +0 -91
  772. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/ms_int_utils.h +0 -108
  773. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +0 -64
  774. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/add_param.h +0 -68
  775. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +0 -40
  776. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/cast_param.h +0 -30
  777. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
  778. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
  779. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
  780. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -38
  781. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +0 -42
  782. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +0 -33
  783. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -377
  784. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/reshape_and_cache_nz/kernel/reshape_and_cache_nz.h +0 -24
  785. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/reshape_and_cache_nz/reshape_and_cache_nz_impl.h +0 -42
  786. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/reshape_and_cache_nz/reshape_and_cache_nz_tiling.h +0 -27
  787. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -46
  788. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/kernel/sub_kernel.h +0 -20
  789. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -48
  790. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_tiling.h +0 -25
  791. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +0 -399
  792. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/utils.h +0 -41
  793. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +0 -45
  794. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_tiling.h +0 -29
  795. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +0 -30
  796. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -69
  797. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_core.h +0 -43
  798. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_entity.h +0 -38
  799. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_sink.h +0 -69
  800. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_stream.h +0 -41
  801. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -71
  802. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -165
  803. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +0 -20
  804. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
  805. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -121
  806. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -106
  807. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  808. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  809. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  810. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_old_impl.so +0 -0
  811. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_old_impl.so +0 -0
  812. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_impl.so +0 -0
  813. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_old_impl.so +0 -0
  814. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMatMulPostFusionMixTactic/acme_matmul_postfusion_mix.json +0 -19
  815. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMatMulPostFusionMixTactic/acme_matmul_postfusion_mix.o +0 -0
  816. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMatMulPostFusionMixTactic/acme_matmul_postfusion_mix_mix_aic_0.o +0 -0
  817. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMatMulPostFusionMixTactic/acme_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  818. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMultiWeightMatMulPostFusionMixTactic/acme_multi_weight_matmul_postfusion_mix.json +0 -19
  819. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMultiWeightMatMulPostFusionMixTactic/acme_multi_weight_matmul_postfusion_mix.o +0 -0
  820. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMultiWeightMatMulPostFusionMixTactic/acme_multi_weight_matmul_postfusion_mix_mix_aic_0.o +0 -0
  821. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/AcmeMultiWeightMatMulPostFusionMixTactic/acme_multi_weight_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  822. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  823. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  824. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  825. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  826. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  827. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  828. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  829. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  830. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o +0 -0
  831. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  832. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  833. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  834. mindspore/profiler/envprofiling.py +0 -254
  835. mindspore/profiler/profiling.py +0 -1926
  836. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_decoder_kv_cache.h +0 -0
  837. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_prompt_kv_cache.h +0 -0
  838. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.cpp +0 -0
  839. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.cpp +0 -0
  840. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/include/aclnn_all_finite.h +0 -0
  841. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -0
  842. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json → custom_ascendc_910b/op_impl/ai_core/tbe/config/ascend910_93/aic-ascend910_93-ops-info.json} +0 -0
  843. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -0
  844. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.cpp +0 -0
  845. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_proto/inc/op_proto.h +0 -0
  846. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
  847. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
@@ -1,460 +0,0 @@
1
-
2
- /**
3
- * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
4
- *
5
- * Licensed under the Apache License, Version 2.0 (the "License");
6
- * you may not use this file except in compliance with the License.
7
- * You may obtain a copy of the License at
8
- *
9
- * http://www.apache.org/licenses/LICENSE-2.0
10
- *
11
- * Unless required by applicable law or agreed to in writing, software
12
- * distributed under the License is distributed on an "AS IS" BASIS,
13
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- * See the License for the specific language governing permissions and
15
- * limitations under the License.
16
- */
17
- #ifndef ROTARY_POS_EMB_NZ_BASE
18
- #define ROTARY_POS_EMB_NZ_BASE
19
-
20
- #include "apply_rotary_pos_emb_nz_tiling.h"
21
- #include "apply_rotary_pos_emb_nz_value.h"
22
- #include "kernel_operator.h"
23
-
24
- template <typename QkDtype, typename CosDtype, bool IF_COS_BROADCAST>
25
- class RopeBase {
26
- public:
27
- // QkDtype :输入qk和输出qk的数据类型
28
- // CosDtype :输入cos/sin的数据类型
29
- // IF_COS_BROADCAST :cos sin是否已扩展
30
- // 构造函数
31
- __aicore__ inline RopeBase(RopeTilingNzData *tilingData) {
32
- setCtrl_ = get_ctrl();
33
- #if __CCE_AICORE__ == 220
34
- set_ctrl(sbitset0(get_ctrl(), REG_910B));
35
- #elif __CCE_AICORE__ == 200
36
- set_ctrl(sbitset1(get_ctrl(), REG_310P));
37
- #endif
38
- this->tilingData_ = tilingData;
39
- batchSize_ = (tilingData_->cosFormat == 0)
40
- ? 0
41
- : ((tilingData_->batch + DEFAULT_REPEAT_STRIDE - 1) / DEFAULT_REPEAT_STRIDE) * DEFAULT_REPEAT_STRIDE;
42
- hiddenSize_ =
43
- tilingData_->hiddenSizeK > tilingData_->hiddenSizeQ ? tilingData_->hiddenSizeK : tilingData_->hiddenSizeQ;
44
- nlCoreRun_ = (tilingData_->ntokens + tilingData_->realCore - 1) / tilingData_->realCore;
45
- lCoreRun_ = tilingData_->ntokens - (tilingData_->realCore - 1) * nlCoreRun_;
46
- headNum_ = tilingData_->headNumK > tilingData_->headNumQ ? tilingData_->headNumK : tilingData_->headNumQ;
47
- rotateStride_ = tilingData_->headDim / tilingData_->rotaryCoeff;
48
- dynamicRound_ = (block_idx == tilingData_->realCore - 1) ? lCoreRun_ : nlCoreRun_;
49
- rotaryStrideOffset = (tilingData_->headDim == tilingData_->rotaryCoeff) ? 1 : rotateStride_;
50
- alignRotary_ = rotateStride_ % ELE_NUM_FP16;
51
- pipe_.InitBuffer(seqLenQueue_, 1, (batchSize_ * sizeof(int32_t)));
52
- }
53
-
54
- // 初始化Gm
55
- __aicore__ inline void RopeInitGm(__gm__ uint8_t *q, __gm__ uint8_t *k, __gm__ uint8_t *cos, __gm__ uint8_t *sin,
56
- __gm__ uint8_t *seqLen, __gm__ uint8_t *outQ, __gm__ uint8_t *outK) {
57
- qGm_ = (__gm__ QkDtype *)q;
58
- kGm_ = (__gm__ QkDtype *)k;
59
- cosGm_ = (__gm__ CosDtype *)cos;
60
- sinGm_ = (__gm__ CosDtype *)sin;
61
- outQGm_ = (__gm__ QkDtype *)outQ;
62
- outKGm_ = (__gm__ QkDtype *)outK;
63
- seqLenGm_ = (__gm__ uint32_t *)seqLen;
64
- }
65
-
66
- template <typename T>
67
- __aicore__ inline void Copy2Ub(__gm__ T *src, __ubuf__ T *dst, uint32_t copyLen) {
68
- #if defined(__CCE_KT_TEST__) || (__CCE_AICORE__ == 220)
69
- if (g_coreType == AscendC::AIC) return;
70
- #endif
71
- uint32_t blkSizeReal = BLK_SIZE / sizeof(T);
72
- if (copyLen % blkSizeReal != 0) {
73
- copy_gm_to_ubuf(dst, src, 0, 1, (copyLen + blkSizeReal - 1) / blkSizeReal, 0, 0);
74
- pipe_barrier((PIPE_ALL));
75
- } else {
76
- copy_gm_to_ubuf(dst, src, 0, 1, copyLen / blkSizeReal, 0, 0);
77
- pipe_barrier((PIPE_ALL));
78
- }
79
- }
80
-
81
- template <typename T>
82
- __aicore__ inline void Copy2Gm(__ubuf__ T *src, __gm__ T *dst, uint32_t hiddenSizeLen) {
83
- #if defined(__CCE_KT_TEST__) || (__CCE_AICORE__ == 220)
84
- if (g_coreType == AscendC::AIC) return;
85
- #endif
86
- uint32_t blkSizeReal = BLK_SIZE / sizeof(T);
87
- if (hiddenSizeLen % blkSizeReal != 0) {
88
- copy_ubuf_to_gm(dst, src, 0, 1, (hiddenSizeLen + blkSizeReal - 1) / blkSizeReal, 0, 0);
89
- } else {
90
- copy_ubuf_to_gm(dst, src, 0, 1, hiddenSizeLen / blkSizeReal, 0, 0);
91
- }
92
- }
93
-
94
- // 此函数用来复用unpad情況下的cos和sin
95
- // 例:cos[0~7] cos[0~3]用于第一个batch, cos[0~4]用于第二个batch
96
- __aicore__ inline void ExpandCosSin(__ubuf__ CosDtype *tempBuf, __gm__ CosDtype *src, __gm__ CosDtype *extraGm) {
97
- #if defined(__CCE_KT_TEST__) || (__CCE_AICORE__ == 220)
98
- if (g_coreType == AscendC::AIC) return;
99
- #endif
100
- // cos or sin,[maxseqlen,headsize]-->[sumseqlen,hiddensize]
101
- AscendC::LocalTensor<int32_t> seqLenLocal = seqLenQueue_.AllocTensor<int32_t>();
102
- // copy_gm_to_ubuf((__ubuf__ int32_t *)seqLenLocal.GetPhyAddr(), seqLenGm_, 0, 1,
103
- // batchSize_ * sizeof(int32_t) / 32, 0, 0);
104
- int32_t seqLenTmp = this->tilingData_->ntokens / this->tilingData_->batch;
105
- for (uint32_t i = 0; i < this->tilingData_->batch; i++) {
106
- seqLenLocal.SetValue(i, seqLenTmp);
107
- }
108
- pipe_barrier((PIPE_ALL));
109
- int32_t rowsPerLoop = (maxProcessNum_ - batchSize_ * NUM_TWO) / tilingData_->headDim;
110
- int32_t cosoffset = 0;
111
- for (uint32_t perBatch = 0; perBatch < tilingData_->batch; perBatch++) {
112
- int32_t rowsRepeat = seqLenLocal.GetValue(perBatch) / rowsPerLoop;
113
- int32_t rowsRemain = seqLenLocal.GetValue(perBatch) % rowsPerLoop;
114
- for (int32_t j = 0; j < rowsRepeat; j++) {
115
- Copy2Ub(src + (j * rowsPerLoop) * tilingData_->headDim, tempBuf, rowsPerLoop * tilingData_->headDim);
116
- Copy2Gm(tempBuf, (extraGm + (cosoffset + j * rowsPerLoop) * tilingData_->headDim),
117
- rowsPerLoop * tilingData_->headDim);
118
- pipe_barrier((PIPE_ALL));
119
- }
120
- if (rowsRemain > 0) {
121
- Copy2Ub(src + (rowsRepeat * rowsPerLoop) * tilingData_->headDim, tempBuf, rowsRemain * tilingData_->headDim);
122
- Copy2Gm(tempBuf, (extraGm + (cosoffset + rowsRepeat * rowsPerLoop) * tilingData_->headDim),
123
- rowsRemain * tilingData_->headDim);
124
- pipe_barrier((PIPE_ALL));
125
- }
126
- cosoffset += seqLenLocal.GetValue(perBatch);
127
- }
128
- seqLenQueue_.FreeTensor(seqLenLocal);
129
- pipe_barrier((PIPE_ALL));
130
- }
131
-
132
- // 构建tensor -1 -1 -1 0 0 0
133
- // 构建tensor 0 0 0 1 1 1
134
- template <typename BUF_TYPE>
135
- __aicore__ inline void ExpandNeg(__ubuf__ BUF_TYPE *tempBuf, uint32_t bufPos, uint32_t headNumTemp,
136
- uint32_t repeatTimeTemp) {
137
- if (tilingData_->headDim != tilingData_->rotaryCoeff) {
138
- if (alignRotary_ == 0) { // 对齐直接 -1 1
139
- for (uint32_t i = 0; i < rotateStride_; ++i) {
140
- *(tempBuf + negOne_ + i) = (BUF_TYPE)-1;
141
- *(tempBuf + negOne_ + i + rotateStride_) = (BUF_TYPE)1;
142
- }
143
- set_flag(PIPE_S, PIPE_V, EVENT_ID1);
144
- wait_flag(PIPE_S, PIPE_V, EVENT_ID1);
145
- for (uint32_t i = 1; i < headNumTemp * tilingData_->rotaryCoeff / NUM_TWO; ++i) {
146
- // halfHeadDim = rotateStride_ * 2
147
- copy_ubuf_to_ubuf(tempBuf + negOne_ + rotateStride_ * NUM_TWO * i, tempBuf + negOne_, 0, 1,
148
- rotateStride_ * sizeof(BUF_TYPE) / ELE_NUM_FP16, 0, 0);
149
- }
150
- } else {
151
- for (uint32_t i = 0; i < rotateStride_; ++i) { // 非对齐 -1 0
152
- *(tempBuf + negOne_ + i) = (BUF_TYPE)-1;
153
- *(tempBuf + negOne_ + i + rotateStride_) = (BUF_TYPE)0;
154
- }
155
- set_flag(PIPE_S, PIPE_V, EVENT_ID1);
156
- wait_flag(PIPE_S, PIPE_V, EVENT_ID1);
157
- for (uint32_t i = 0; i < headNumTemp * tilingData_->rotaryCoeff / NUM_TWO; ++i) {
158
- if ((rotateStride_ * NUM_TWO) * sizeof(BUF_TYPE) % BLK_SIZE == 0) {
159
- copy_ubuf_to_ubuf(tempBuf + negOne_ + rotateStride_ * NUM_TWO * i, tempBuf + negOne_, 0, 1,
160
- rotateStride_ * NUM_TWO * sizeof(BUF_TYPE) / ELE_NUM_FP16, 0, 0);
161
- } else {
162
- for (uint32_t j = 0; j < rotateStride_ * NUM_TWO; j++) {
163
- *(tempBuf + negOne_ + rotateStride_ * NUM_TWO * i + j) = *(tempBuf + negOne_ + j);
164
- }
165
- }
166
- }
167
- set_flag(PIPE_S, PIPE_V, EVENT_ID1);
168
- wait_flag(PIPE_S, PIPE_V, EVENT_ID1);
169
- pipe_barrier(PIPE_V);
170
- vadds(tempBuf + bufPos, tempBuf + negOne_, (BUF_TYPE)1, repeatTimeTemp, 1, 1, DEFAULT_REPEAT_STRIDE,
171
- DEFAULT_REPEAT_STRIDE);
172
- }
173
- } else {
174
- set_vector_mask((uint64_t)-1, (uint64_t)-1);
175
- vector_dup(tempBuf + negOne_, (BUF_TYPE)-1.0, repeatTimeTemp, 1, 1, (uint16_t)DEFAULT_REPEAT_STRIDE,
176
- (uint16_t)DEFAULT_REPEAT_STRIDE);
177
- set_vector_mask(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
178
- vector_dup(tempBuf + negOne_, (BUF_TYPE)0.0, repeatTimeTemp, 1, 1, (uint16_t)DEFAULT_REPEAT_STRIDE,
179
- (uint16_t)DEFAULT_REPEAT_STRIDE);
180
- set_vector_mask((uint64_t)-1, (uint64_t)-1);
181
- pipe_barrier((PIPE_V));
182
- vadds(tempBuf + bufPos, tempBuf + negOne_, (BUF_TYPE)1, repeatTimeTemp, 1, 1, DEFAULT_REPEAT_STRIDE,
183
- DEFAULT_REPEAT_STRIDE);
184
- }
185
- }
186
-
187
- // 从(tilingData_->headDim)->(heads*tilingData_->headDim)
188
- __aicore__ inline void CosSinCommonBroardcast(__gm__ uint8_t *extraGm, uint32_t z, __ubuf__ CosDtype *tempBuf,
189
- uint32_t calcLen) {
190
- // 永远的先拷一次
191
- uint32_t cosOffset = block_idx * nlCoreRun_ * tilingData_->headDim + z * tilingData_->headDim;
192
- uint32_t sinOffset = block_idx * nlCoreRun_ * tilingData_->headDim + z * tilingData_->headDim;
193
- set_flag(PIPE_S, PIPE_MTE2, EVENT_ID1);
194
- wait_flag(PIPE_S, PIPE_MTE2, EVENT_ID1);
195
- copy_gm_to_ubuf(tempBuf + cosPad_, cosGm_ + cosOffset, 0, 1,
196
- (tilingData_->headDim * sizeof(CosDtype) + BLK_SIZE - 1) / BLK_SIZE, 0, 0);
197
- copy_gm_to_ubuf(tempBuf + sinPad_, sinGm_ + sinOffset, 0, 1,
198
- (tilingData_->headDim * sizeof(CosDtype) + BLK_SIZE - 1) / BLK_SIZE, 0, 0);
199
- if (tilingData_->cosFormat == 1) {
200
- pipe_barrier(PIPE_ALL);
201
- }
202
- set_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID3);
203
- set_flag(PIPE_MTE2, PIPE_V, EVENT_ID3);
204
- if ((tilingData_->headDim * sizeof(CosDtype)) % BLK_SIZE != 0) {
205
- wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID3);
206
- // 补齐cos,从(tilingData_->headDim)->(heads*tilingData_->headDim)
207
- // headnum
208
- for (uint32_t i = 0; i < calcLen / tilingData_->headDim; ++i) {
209
- copy_ubuf_to_gm((__gm__ CosDtype *)extraGm + offsetExtraGm_ + tilingData_->headDim * i, tempBuf + cosPad_, 0, 1,
210
- (tilingData_->headDim * sizeof(CosDtype) + BLK_SIZE - 1) / BLK_SIZE, 0, 0);
211
- pipe_barrier((PIPE_ALL));
212
- }
213
- Copy2Ub<CosDtype>((__gm__ CosDtype *)extraGm + offsetExtraGm_, tempBuf + cosPad_, calcLen);
214
- // 补齐sin,从(tilingData_->headDim)->(heads*tilingData_->headDim)
215
- for (uint32_t i = 0; i < calcLen / tilingData_->headDim; ++i) {
216
- copy_ubuf_to_gm((__gm__ CosDtype *)extraGm + offsetExtraGm_ + tilingData_->headDim * i, tempBuf + sinPad_, 0, 1,
217
- (tilingData_->headDim * sizeof(CosDtype) + BLK_SIZE - 1) / BLK_SIZE, 0, 0);
218
- pipe_barrier((PIPE_ALL));
219
- }
220
- Copy2Ub<CosDtype>((__gm__ CosDtype *)extraGm + offsetExtraGm_, tempBuf + sinPad_, calcLen);
221
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID3);
222
- } else {
223
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID3);
224
- for (uint32_t i = 1; i < calcLen / tilingData_->headDim; ++i) {
225
- copy_ubuf_to_ubuf(tempBuf + cosPad_ + tilingData_->headDim * i, tempBuf + cosPad_, 0, 1,
226
- tilingData_->headDim * sizeof(CosDtype) / BLK_SIZE, 0, 0);
227
- copy_ubuf_to_ubuf(tempBuf + sinPad_ + tilingData_->headDim * i, tempBuf + sinPad_, 0, 1,
228
- tilingData_->headDim * sizeof(CosDtype) / BLK_SIZE, 0, 0);
229
- }
230
- wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID3);
231
- }
232
- }
233
-
234
- // 满足 cos sin 多头输入
235
- template <typename BUF_TYPE>
236
- __aicore__ inline void CosSinBroadcast(__gm__ uint8_t *extraGm, uint32_t z, __ubuf__ BUF_TYPE *tempBuf,
237
- uint32_t Calclen) {
238
- if constexpr (IF_COS_BROADCAST) {
239
- copy_gm_to_ubuf(tempBuf + cosPad_,
240
- cosGm_ + block_idx * nlCoreRun_ * tilingData_->hiddenSizeQ + z * tilingData_->hiddenSizeQ, 0, 1,
241
- Calclen * sizeof(BUF_TYPE) / BLK_SIZE, 0, 0);
242
- copy_gm_to_ubuf(tempBuf + sinPad_,
243
- sinGm_ + block_idx * nlCoreRun_ * tilingData_->hiddenSizeQ + z * tilingData_->hiddenSizeQ, 0, 1,
244
- Calclen * sizeof(BUF_TYPE) / BLK_SIZE, 0, 0);
245
- } else {
246
- CosSinCommonBroardcast(extraGm, z, tempBuf, Calclen);
247
- }
248
- }
249
-
250
- // qk 公用函数
251
- template <typename BUF_TYPE>
252
- __aicore__ inline void QkComm(__gm__ BUF_TYPE *src, __gm__ uint8_t *extraGm1, uint32_t hiddenSizeTmp,
253
- __ubuf__ BUF_TYPE *tempBuf, uint32_t headNumTemp) {
254
- uint32_t hiddenSizeBlk = hiddenSizeTmp / ELE_NUM_FP16;
255
- set_flag(PIPE_S, PIPE_MTE2, EVENT_ID1);
256
- wait_flag(PIPE_S, PIPE_MTE2, EVENT_ID1);
257
- copy_gm_to_ubuf(tempBuf + oriPos_, // gm -> ub
258
- src, 0, 1, hiddenSizeBlk, 0, 0);
259
- set_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
260
- set_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID2);
261
- if (alignRotary_ == 0) {
262
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
263
- wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID2);
264
- copy_ubuf_to_ubuf(tempBuf + removeBefore_ + rotaryStrideOffset, tempBuf + oriPos_, 0,
265
- headNumTemp * tilingData_->rotaryCoeff / 2, rotaryStrideOffset / ELE_NUM_FP16,
266
- rotaryStrideOffset / ELE_NUM_FP16, rotaryStrideOffset / ELE_NUM_FP16);
267
-
268
- copy_ubuf_to_ubuf(tempBuf + removeBefore_, tempBuf + oriPos_ + rotaryStrideOffset, 0,
269
- headNumTemp * tilingData_->rotaryCoeff / 2, rotaryStrideOffset / ELE_NUM_FP16,
270
- rotaryStrideOffset / ELE_NUM_FP16, rotaryStrideOffset / ELE_NUM_FP16);
271
- } else {
272
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
273
- wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID2);
274
- // ub -> workspace[0~hiddensize]
275
- copy_ubuf_to_gm((__gm__ BUF_TYPE *)extraGm1 + offsetExtraGm_, tempBuf + oriPos_, 0, 1, hiddenSizeBlk, 0, 0);
276
- // ub -> workspace[hiddensize ~ 2 * hiddensize]
277
- copy_ubuf_to_gm((__gm__ BUF_TYPE *)extraGm1 + offsetExtraGm_ + hiddenSizeTmp, tempBuf + oriPos_, 0, 1,
278
- hiddenSizeBlk, 0, 0);
279
- // workspace[rotary ~ hiddensize + rotary] -> ub[hiddensize ~ 2 * hiddensize]
280
- pipe_barrier((PIPE_ALL));
281
- copy_gm_to_ubuf(tempBuf + removeBefore_, (__gm__ BUF_TYPE *)extraGm1 + offsetExtraGm_ + rotateStride_, 0, 1,
282
- hiddenSizeBlk, 0, 0);
283
- // gm[hiddensize - rotary ~ 2 * hiddensize - rotary] -> ub[2 *hiddensize ~ 3 * hiddensize]
284
- copy_gm_to_ubuf(tempBuf + padBefore_,
285
- (__gm__ BUF_TYPE *)extraGm1 + offsetExtraGm_ + hiddenSizeTmp - rotateStride_, 0, 1, hiddenSizeBlk,
286
- 0, 0);
287
- }
288
- }
289
-
290
- // 主体计算逻辑
291
- template <typename BUF_TYPE>
292
- __aicore__ inline void CalcRope(__ubuf__ BUF_TYPE *tempBuf, uint32_t repeatTimes1, uint32_t oriPosTemp,
293
- uint32_t removeTemp, uint32_t padTemp, uint32_t posTemp, uint32_t res) {
294
- set_vector_mask((uint64_t)-1, (uint64_t)-1);
295
- #if defined(__CCE_KT_TEST__) || (__CCE_AICORE__ == 220)
296
- if (g_coreType == AscendC::AIC) return;
297
- #endif
298
-
299
- vmul(tempBuf + oriPosTemp, tempBuf + cosPad_, tempBuf + oriPosTemp,
300
- repeatTimes1, // repeat times
301
- 1, // dstBlockStride
302
- 1, // src0BlockStride
303
- 1, // src1BlockStride
304
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
305
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
306
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
307
- );
308
- vmul(tempBuf + padTemp, tempBuf + posTemp, tempBuf + padTemp,
309
- repeatTimes1, // repeat times
310
- 1, // dstBlockStride
311
- 1, // src0BlockStride
312
- 1, // src1BlockStride
313
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
314
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
315
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
316
- );
317
- pipe_barrier((PIPE_V));
318
- vmul(tempBuf + removeTemp, tempBuf + sinPad_, tempBuf + removeTemp,
319
- repeatTimes1, // repeat times
320
- 1, // dstBlockStride
321
- 1, // src0BlockStride
322
- 1, // src1BlockStride
323
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
324
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
325
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
326
- );
327
- vmul(tempBuf + padTemp, tempBuf + sinPad_, tempBuf + padTemp,
328
- repeatTimes1, // repeat times
329
- 1, // dstBlockStride
330
- 1, // src0BlockStride
331
- 1, // src1BlockStride
332
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
333
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
334
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
335
- );
336
- pipe_barrier((PIPE_V));
337
-
338
- vmul(tempBuf + removeTemp, tempBuf + negOne_, tempBuf + removeTemp,
339
- repeatTimes1, // repeat times
340
- 1, // dstBlockStride
341
- 1, // src0BlockStride
342
- 1, // src1BlockStride
343
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
344
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
345
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
346
- );
347
- vadd(tempBuf + padTemp, tempBuf + oriPosTemp, tempBuf + padTemp,
348
- repeatTimes1, // repeat times
349
- 1, // dstBlockStride
350
- 1, // src0BlockStride
351
- 1, // src1BlockStride
352
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
353
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
354
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
355
- );
356
- pipe_barrier((PIPE_V));
357
-
358
- vadd(tempBuf + res, tempBuf + removeTemp, tempBuf + padTemp,
359
- repeatTimes1, // repeat times
360
- 1, // dstBlockStride
361
- 1, // src0BlockStride
362
- 1, // src1BlockStride
363
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
364
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
365
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
366
- );
367
- pipe_barrier((PIPE_V));
368
- set_ctrl(setCtrl_);
369
- }
370
-
371
- // 主体计算逻辑
372
- template <typename BUF_TYPE>
373
- __aicore__ inline void CalcRopeAlign(__ubuf__ BUF_TYPE *tempBuf, uint32_t repeatTimes1, uint32_t oriPosTemp,
374
- uint32_t removeTemp, uint32_t padTemp) {
375
- set_vector_mask((uint64_t)-1, (uint64_t)-1);
376
- #if defined(__CCE_KT_TEST__) || (__CCE_AICORE__ == 220)
377
- if (g_coreType == AscendC::AIC) return;
378
- #endif
379
- vmul(tempBuf + oriPosTemp, tempBuf + cosPad_, tempBuf + oriPosTemp,
380
- repeatTimes1, // repeat times
381
- 1, // dstBlockStride
382
- 1, // src0BlockStride
383
- 1, // src1BlockStride
384
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
385
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
386
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
387
- );
388
- vmul(tempBuf + removeTemp, tempBuf + negOne_, tempBuf + removeTemp,
389
- repeatTimes1, // repeat times
390
- 1, // dstBlockStride
391
- 1, // src0BlockStride
392
- 1, // src1BlockStride
393
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
394
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
395
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
396
- );
397
- pipe_barrier((PIPE_V));
398
- vmul(tempBuf + removeTemp, tempBuf + sinPad_, tempBuf + removeTemp,
399
- repeatTimes1, // repeat times
400
- 1, // dstBlockStride
401
- 1, // src0BlockStride
402
- 1, // src1BlockStride
403
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
404
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
405
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
406
- );
407
- pipe_barrier((PIPE_V));
408
- vadd(tempBuf + padTemp, tempBuf + removeTemp, tempBuf + oriPosTemp,
409
- repeatTimes1, // repeat times
410
- 1, // dstBlockStride
411
- 1, // src0BlockStride
412
- 1, // src1BlockStride
413
- DEFAULT_REPEAT_STRIDE, // dstRepeatStride
414
- DEFAULT_REPEAT_STRIDE, // src0RepeatStride
415
- DEFAULT_REPEAT_STRIDE // src1RepeatStride
416
- );
417
- pipe_barrier((PIPE_V));
418
- set_ctrl(setCtrl_);
419
- }
420
-
421
- public:
422
- RopeTilingNzData *tilingData_ = nullptr;
423
- __gm__ QkDtype *qGm_{nullptr};
424
- __gm__ QkDtype *kGm_{nullptr};
425
- __gm__ CosDtype *cosGm_{nullptr};
426
- __gm__ CosDtype *sinGm_{nullptr};
427
- __gm__ uint32_t *seqLenGm_{nullptr};
428
- __gm__ QkDtype *outQGm_{nullptr};
429
- __gm__ QkDtype *outKGm_{nullptr};
430
- AscendC::TPipe pipe_;
431
- AscendC::TQue<AscendC::QuePosition::VECIN, 1> seqLenQueue_;
432
-
433
- uint32_t cosPad_{0}; // broadcast的cos在uB中的位置
434
- uint32_t sinPad_{0}; // broadcast的sin在uB中的位置
435
- uint32_t negOne_{0}; // -1 -1 -1 0 0 0在uB中的位置
436
- uint32_t oriPos_{0}; // q,k在uB中的位置
437
- uint32_t padBefore_{0}; // 保存qk[-x : hiddensize - x]
438
- uint32_t removeBefore_{0}; // 保存qk[x : hiddensize + x]
439
- uint32_t repeatSize_{0}; // 一拍做几个元素
440
- uint32_t maxProcessNum_{0}; // 最大处理元素个数
441
- uint32_t repeatTimesQ_{0}; // q重复次数
442
- uint32_t repeatTimesK_{0}; // k重复次数
443
- uint32_t hiddenSizeAlign_{0}; // 对齐后的hiddensize
444
- uint32_t repeatTimes_{0}; // 对齐后重复次数
445
- uint32_t headNum_{0}; // 几个头
446
- uint32_t hiddenSize_{0}; // hiddensizeQ,K的最大值
447
- uint32_t nlCoreRun_{0}; // 非最后一个核需要跑几次
448
- uint32_t lCoreRun_{0}; // 最后一个核需要跑几次
449
- uint32_t batchSize_{0}; // batch向上取整
450
- uint32_t rotateStride_{0}; // headdim / 旋转系数
451
- uint32_t offsetExtraGm_{0}; // 使用workspace需要的offset
452
- uint32_t dynamicRound_{0}; // 每个核做几轮
453
- uint32_t setCtrl_; // 复位寄存器
454
- uint32_t alignHalfHeadDim_{0}; // headDim / 旋转系数 * 2 是否对齐
455
- uint32_t rotaryStrideOffset{0}; // 每次旋转长度
456
- uint32_t alignRotary_; // 旋转距离是否对齐
457
- uint32_t syncOffset_; // 每个核使用workspace的offset
458
- };
459
-
460
- #endif
@@ -1,116 +0,0 @@
1
- /**
2
- * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing, software
11
- * distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions and
14
- * limitations under the License.
15
- */
16
- #ifndef ROTARY_POS_EMB_NZ_FP16
17
- #define ROTARY_POS_EMB_NZ_FP16
18
- #include "apply_rotary_pos_emb_nz_base.h"
19
- template <typename QK_DTYPE, typename COS_DTYPE, bool IF_COS_BROADCAST>
20
- class RopeFp16 : public RopeBase<QK_DTYPE, COS_DTYPE, IF_COS_BROADCAST> {
21
- public:
22
- __aicore__ inline RopeFp16(RopeTilingNzData *tilingData) : RopeBase<QK_DTYPE, COS_DTYPE, IF_COS_BROADCAST>(tilingData) {
23
- this->repeatSize_ = 128; // 128 = 256B / sizeof(half)
24
- this->maxProcessNum_ = this->tilingData_->maxUbSize / sizeof(uint16_t);
25
- this->repeatTimesQ_ = (this->tilingData_->hiddenSizeQ + this->repeatSize_ - 1) / this->repeatSize_;
26
- this->repeatTimesK_ = (this->tilingData_->hiddenSizeK + this->repeatSize_ - 1) / this->repeatSize_;
27
- headDimAlign_ = ((this->tilingData_->headDim + ELE_NUM_FP16 - 1) / ELE_NUM_FP16) * ELE_NUM_FP16;
28
- this->alignHalfHeadDim_ = (this->rotateStride_ * NUM_TWO) % ELE_NUM_FP16;
29
- this->hiddenSizeAlign_ = ((this->hiddenSize_ + this->repeatSize_ - 1) / this->repeatSize_) * this->repeatSize_;
30
-
31
- this->cosPad_ = 0;
32
- this->sinPad_ = this->cosPad_ + this->hiddenSizeAlign_;
33
- this->negOne_ = this->sinPad_ + this->hiddenSizeAlign_;
34
- this->oriPos_ = this->negOne_ + this->hiddenSizeAlign_;
35
- this->padBefore_ = this->oriPos_ + this->hiddenSizeAlign_;
36
- this->removeBefore_ = this->padBefore_ + this->hiddenSizeAlign_;
37
- sinResPos_ = this->removeBefore_ + this->hiddenSizeAlign_;
38
- this->repeatTimes_ = this->hiddenSizeAlign_ / this->repeatSize_;
39
-
40
- this->syncOffset_ =
41
- (this->tilingData_->headDim % ELE_NUM_FP16 == 0) ? this->hiddenSizeAlign_ : this->headNum_ * headDimAlign_;
42
- this->offsetExtraGm_ = NUM_TWO * block_idx * this->syncOffset_;
43
- this->pipe_.InitBuffer(outQueueCO2_, 1, ((this->maxProcessNum_ - this->batchSize_ * NUM_TWO) * sizeof(QK_DTYPE)));
44
- AscendC::LocalTensor<QK_DTYPE> cache_perloop_ub_ = outQueueCO2_.AllocTensor<QK_DTYPE>();
45
- commonUbuf_ = (__ubuf__ QK_DTYPE *)cache_perloop_ub_.GetPhyAddr();
46
- }
47
-
48
- __aicore__ inline void Process(__gm__ uint8_t *extraGm) {
49
- if (this->tilingData_->cosFormat == 1) {
50
- pipe_barrier((PIPE_ALL));
51
- this->ExpandCosSin(commonUbuf_, this->cosGm_, (__gm__ COS_DTYPE *)extraGm);
52
- this->cosGm_ = (__gm__ COS_DTYPE *)extraGm;
53
- pipe_barrier((PIPE_ALL));
54
- this->ExpandCosSin(commonUbuf_, this->sinGm_,
55
- (__gm__ COS_DTYPE *)extraGm + this->tilingData_->ntokens * this->tilingData_->headDim);
56
- this->sinGm_ = (__gm__ COS_DTYPE *)extraGm + this->tilingData_->ntokens * this->tilingData_->headDim;
57
- extraGm =
58
- extraGm + this->tilingData_->ntokens * this->tilingData_->headDim * 4; // sizeof(uint8_t) * 2 = sizeof(half)
59
- pipe_barrier((PIPE_ALL));
60
- }
61
-
62
- this->ExpandNeg(commonUbuf_, sinResPos_, this->headNum_, this->repeatTimes_); // 根据是否对齐选择1 -1 還是 -1 0
63
- for (uint32_t zz = 0; zz < this->dynamicRound_; ++zz) {
64
- this->CosSinBroadcast(extraGm, zz, commonUbuf_, this->tilingData_->hiddenSizeQ); // cos sin 和 QK 无关
65
-
66
- this->QkComm(this->qGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeQ +
67
- zz * this->tilingData_->hiddenSizeQ,
68
- extraGm, this->tilingData_->hiddenSizeQ, commonUbuf_, this->tilingData_->headNumQ);
69
-
70
- if (this->alignRotary_ == 0) {
71
- pipe_barrier((PIPE_V));
72
- this->CalcRopeAlign(commonUbuf_, this->repeatTimesQ_, this->oriPos_, this->removeBefore_, this->padBefore_);
73
- } else {
74
- set_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
75
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
76
- this->CalcRope(commonUbuf_, this->repeatTimesQ_, this->oriPos_, this->removeBefore_, this->padBefore_,
77
- sinResPos_, this->padBefore_);
78
- }
79
- pipe_barrier((PIPE_ALL)); // 需要
80
- copy_ubuf_to_gm(this->outQGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeQ +
81
- zz * this->tilingData_->hiddenSizeQ,
82
- commonUbuf_ + this->padBefore_, 0, 1, this->tilingData_->hiddenSizeQ / ELE_NUM_FP16, 0, 0);
83
-
84
- set_flag(PIPE_MTE3, PIPE_MTE2, EVENT_ID1);
85
- wait_flag(PIPE_MTE3, PIPE_MTE2, EVENT_ID1);
86
-
87
- this->QkComm(this->kGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeK +
88
- zz * this->tilingData_->hiddenSizeK,
89
- extraGm, this->tilingData_->hiddenSizeK, commonUbuf_, this->tilingData_->headNumK);
90
-
91
- if (this->alignRotary_ == 0) {
92
- pipe_barrier((PIPE_V));
93
- this->CalcRopeAlign(commonUbuf_, this->repeatTimesK_, this->oriPos_, this->removeBefore_, this->padBefore_);
94
- } else {
95
- set_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
96
- wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
97
- this->CalcRope(commonUbuf_, this->repeatTimesK_, this->oriPos_, this->removeBefore_, this->padBefore_,
98
- sinResPos_, this->padBefore_);
99
- }
100
- pipe_barrier((PIPE_ALL)); // 需要
101
- copy_ubuf_to_gm(this->outKGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeK +
102
- zz * this->tilingData_->hiddenSizeK,
103
- commonUbuf_ + this->padBefore_, 0, 1, this->tilingData_->hiddenSizeK / ELE_NUM_FP16, 0, 0);
104
- set_flag(PIPE_MTE3, PIPE_V, EVENT_ID1);
105
- wait_flag(PIPE_MTE3, PIPE_V, EVENT_ID1);
106
- }
107
- }
108
-
109
- private:
110
- AscendC::TQue<AscendC::QuePosition::VECIN, 1> outQueueCO2_;
111
- __ubuf__ QK_DTYPE *commonUbuf_{nullptr};
112
- uint32_t headDimAlign_; // 对齐的headDim
113
- uint32_t sinResPos_{0}; // fp32的buf中0 0 0 1 1 1的位置
114
- };
115
-
116
- #endif