mindspore 2.4.0__cp39-none-any.whl → 2.4.10__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (306) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  4. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  5. mindspore/bin/cache_server +0 -0
  6. mindspore/common/api.py +1 -4
  7. mindspore/common/file_system.py +2 -0
  8. mindspore/common/initializer.py +51 -15
  9. mindspore/common/parameter.py +6 -5
  10. mindspore/common/tensor.py +15 -49
  11. mindspore/communication/_comm_helper.py +5 -0
  12. mindspore/communication/comm_func.py +7 -7
  13. mindspore/context.py +16 -2
  14. mindspore/dataset/engine/datasets_standard_format.py +17 -0
  15. mindspore/dataset/engine/datasets_user_defined.py +27 -1
  16. mindspore/experimental/llm_boost/__init__.py +2 -2
  17. mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
  18. mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
  19. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  20. mindspore/include/api/context.h +1 -1
  21. mindspore/include/dataset/constants.h +2 -2
  22. mindspore/include/mindapi/base/format.h +13 -0
  23. mindspore/lib/libavcodec.so.59 +0 -0
  24. mindspore/lib/libavdevice.so.59 +0 -0
  25. mindspore/lib/libavfilter.so.8 +0 -0
  26. mindspore/lib/libavformat.so.59 +0 -0
  27. mindspore/lib/libavutil.so.57 +0 -0
  28. mindspore/lib/libdnnl.so.2 +0 -0
  29. mindspore/lib/libmindspore_backend.so +0 -0
  30. mindspore/lib/libmindspore_common.so +0 -0
  31. mindspore/lib/libmindspore_core.so +0 -0
  32. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  33. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  34. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  35. mindspore/lib/libmindspore_ops.so +0 -0
  36. mindspore/lib/libopencv_core.so.4.5 +0 -0
  37. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  38. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  39. mindspore/lib/libswresample.so.4 +0 -0
  40. mindspore/lib/libswscale.so.6 +0 -0
  41. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  42. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  43. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  44. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +10 -0
  45. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  46. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -42
  47. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.py +51 -16
  48. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.py +51 -16
  49. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  50. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  51. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  52. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  53. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  54. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  55. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  56. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  57. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  58. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  59. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  60. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  61. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  62. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  63. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  64. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  65. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  66. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  67. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  68. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  69. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  70. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  71. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  72. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  73. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  74. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  76. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  77. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  78. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  80. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  81. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +302 -0
  82. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +892 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +892 -0
  84. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  85. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  86. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +33 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +1 -0
  89. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +14 -0
  90. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +59 -0
  91. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +59 -0
  92. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/lib/libcust_opapi.so +0 -0
  93. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.py +51 -16
  94. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +192 -0
  95. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +215 -0
  96. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +274 -0
  97. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +215 -0
  98. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +80 -0
  99. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  100. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +80 -0
  101. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  102. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +80 -0
  103. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  104. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  105. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  106. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  107. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  108. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  109. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  110. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  111. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  112. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  113. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  114. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  115. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  116. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  117. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  118. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  119. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  120. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  121. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  122. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  123. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  124. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  125. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  126. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  127. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  128. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  129. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  130. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  131. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  132. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  133. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  134. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  135. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  136. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  137. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o} +0 -0
  138. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  139. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o} +0 -0
  140. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  141. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o} +0 -0
  142. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  143. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  144. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  145. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  146. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  147. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  148. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  149. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  150. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  151. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  152. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  153. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  154. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  155. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  156. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  157. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  158. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  159. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  160. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  161. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  162. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  163. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  164. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  165. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  166. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  167. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  168. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  169. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  170. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  171. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  172. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  173. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  174. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/all_finite.json +139 -0
  175. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +361 -0
  176. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +892 -0
  177. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +892 -0
  178. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +139 -0
  179. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +361 -0
  180. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +892 -0
  181. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +892 -0
  182. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  183. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -0
  186. mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
  187. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  188. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  189. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  192. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  193. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  194. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  195. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +5 -5
  196. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  197. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  198. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
  199. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  200. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +1 -0
  201. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h +6 -1
  202. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +6 -1
  203. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +4 -3
  204. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  205. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  206. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_impl.so +0 -0
  207. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
  208. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  209. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_impl.so +0 -0
  210. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_old_impl.so +0 -0
  211. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
  212. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_old_impl.so +0 -0
  213. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
  214. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
  215. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
  216. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  217. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_impl.so +0 -0
  218. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
  219. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
  220. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_impl.so +0 -0
  221. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_old_impl.so +0 -0
  222. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
  223. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  224. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  225. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  226. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  227. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  228. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  229. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  230. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
  231. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
  232. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
  233. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
  234. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
  235. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
  236. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  237. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  238. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  239. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  240. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  241. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  242. mindspore/mint/__init__.py +490 -2
  243. mindspore/mint/nn/__init__.py +2 -2
  244. mindspore/mint/optim/adamw.py +6 -14
  245. mindspore/nn/__init__.py +2 -0
  246. mindspore/nn/cell.py +16 -4
  247. mindspore/nn/layer/basic.py +24 -7
  248. mindspore/nn/layer/conv.py +3 -0
  249. mindspore/nn/layer/embedding.py +31 -14
  250. mindspore/nn/layer/pooling.py +8 -10
  251. mindspore/nn/optim/tft_wrapper.py +12 -15
  252. mindspore/nn/utils/__init__.py +22 -0
  253. mindspore/nn/utils/init.py +71 -0
  254. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  255. mindspore/ops/_grad_experimental/grad_comm_ops.py +45 -8
  256. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
  257. mindspore/ops/auto_generate/gen_extend_func.py +33 -0
  258. mindspore/ops/auto_generate/gen_ops_def.py +52 -3
  259. mindspore/ops/auto_generate/gen_ops_prim.py +158 -8
  260. mindspore/ops/function/array_func.py +2 -0
  261. mindspore/ops/function/math_func.py +12 -5
  262. mindspore/ops/function/random_func.py +221 -7
  263. mindspore/ops/operations/__init__.py +1 -1
  264. mindspore/ops/operations/array_ops.py +3 -1
  265. mindspore/ops/operations/comm_ops.py +25 -1
  266. mindspore/ops/operations/custom_ops.py +6 -4
  267. mindspore/ops/operations/manually_defined/ops_def.py +8 -10
  268. mindspore/ops/operations/nn_ops.py +7 -2
  269. mindspore/parallel/_auto_parallel_context.py +26 -5
  270. mindspore/parallel/_cell_wrapper.py +24 -3
  271. mindspore/parallel/_tensor.py +46 -2
  272. mindspore/parallel/_utils.py +39 -21
  273. mindspore/parallel/transform_safetensors.py +196 -43
  274. mindspore/profiler/profiling.py +5 -1
  275. mindspore/run_check/_check_version.py +20 -9
  276. mindspore/train/_utils.py +92 -32
  277. mindspore/train/callback/_checkpoint.py +12 -9
  278. mindspore/train/callback/_on_request_exit.py +12 -1
  279. mindspore/train/callback/_tft_register.py +33 -9
  280. mindspore/train/dataset_helper.py +10 -2
  281. mindspore/train/model.py +21 -0
  282. mindspore/train/serialization.py +12 -19
  283. mindspore/version.py +1 -1
  284. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/METADATA +9 -7
  285. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/RECORD +297 -170
  286. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  287. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  288. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  289. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  290. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  291. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  292. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  293. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  294. mindspore/lib/plugin/ascend/custom_ascendc_ops/version.info +0 -1
  295. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_decoder_kv_cache.h +0 -0
  296. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_prompt_kv_cache.h +0 -0
  297. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.cpp +0 -0
  298. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.cpp +0 -0
  299. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/include/aclnn_all_finite.h +0 -0
  300. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -0
  301. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -0
  302. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.cpp +0 -0
  303. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_proto/inc/op_proto.h +0 -0
  304. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/WHEEL +0 -0
  305. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
  306. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,33 @@
1
+ #ifndef OP_PROTO_H_
2
+ #define OP_PROTO_H_
3
+
4
+ #include "graph/operator_reg.h"
5
+ #include "register/op_impl_registry.h"
6
+
7
+ namespace ge {
8
+
9
+ REG_OP(DecoderKvCache)
10
+ .INPUT(cache, ge::TensorType::ALL())
11
+ .INPUT(update, ge::TensorType::ALL())
12
+ .INPUT(valid_seq_len, ge::TensorType::ALL())
13
+ .INPUT(batch_index, ge::TensorType::ALL())
14
+ .INPUT(seq_len_axis, ge::TensorType::ALL())
15
+ .INPUT(new_max_seq_len, ge::TensorType::ALL())
16
+ .INPUT(cur_max_seq_len, ge::TensorType::ALL())
17
+ .OUTPUT(out, ge::TensorType::ALL())
18
+ .OP_END_FACTORY_REG(DecoderKvCache);
19
+
20
+ REG_OP(PromptKvCache)
21
+ .INPUT(cache, ge::TensorType::ALL())
22
+ .INPUT(update, ge::TensorType::ALL())
23
+ .INPUT(valid_seq_len, ge::TensorType::ALL())
24
+ .INPUT(batch_index, ge::TensorType::ALL())
25
+ .INPUT(seq_len_axis, ge::TensorType::ALL())
26
+ .INPUT(new_max_seq_len, ge::TensorType::ALL())
27
+ .INPUT(cur_max_seq_len, ge::TensorType::ALL())
28
+ .OUTPUT(out, ge::TensorType::ALL())
29
+ .OP_END_FACTORY_REG(PromptKvCache);
30
+
31
+ }
32
+
33
+ #endif
@@ -0,0 +1 @@
1
+ custom_opp_compiler_version=7.6.T8.0.B059
@@ -0,0 +1,14 @@
1
+ {
2
+ "AllFinite": {
3
+ "isGray": false,
4
+ "isHeavy": false
5
+ },
6
+ "DecoderKvCache": {
7
+ "isGray": false,
8
+ "isHeavy": false
9
+ },
10
+ "PromptKvCache": {
11
+ "isGray": false,
12
+ "isHeavy": false
13
+ }
14
+ }
@@ -0,0 +1,59 @@
1
+
2
+ /*
3
+ * calution: this file was generated automaticlly donot change it.
4
+ */
5
+
6
+ #ifndef ACLNN_DECODER_KV_CACHE_H_
7
+ #define ACLNN_DECODER_KV_CACHE_H_
8
+
9
+ #include "aclnn/acl_meta.h"
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /* funtion: aclnnDecoderKvCacheGetWorkspaceSize
16
+ * parameters :
17
+ * cache : required
18
+ * update : required
19
+ * validSeqLen : required
20
+ * batchIndex : required
21
+ * seqLenAxis : required
22
+ * newMaxSeqLen : required
23
+ * curMaxSeqLen : required
24
+ * out : required
25
+ * workspaceSize : size of workspace(output).
26
+ * executor : executor context(output).
27
+ */
28
+ __attribute__((visibility("default")))
29
+ aclnnStatus aclnnDecoderKvCacheGetWorkspaceSize(
30
+ const aclTensor *cache,
31
+ const aclTensor *update,
32
+ const aclTensor *validSeqLen,
33
+ const aclTensor *batchIndex,
34
+ const aclTensor *seqLenAxis,
35
+ const aclTensor *newMaxSeqLen,
36
+ const aclTensor *curMaxSeqLen,
37
+ const aclTensor *out,
38
+ uint64_t *workspaceSize,
39
+ aclOpExecutor **executor);
40
+
41
+ /* funtion: aclnnDecoderKvCache
42
+ * parameters :
43
+ * workspace : workspace memory addr(input).
44
+ * workspaceSize : size of workspace(input).
45
+ * executor : executor context(input).
46
+ * stream : acl stream.
47
+ */
48
+ __attribute__((visibility("default")))
49
+ aclnnStatus aclnnDecoderKvCache(
50
+ void *workspace,
51
+ uint64_t workspaceSize,
52
+ aclOpExecutor *executor,
53
+ aclrtStream stream);
54
+
55
+ #ifdef __cplusplus
56
+ }
57
+ #endif
58
+
59
+ #endif
@@ -0,0 +1,59 @@
1
+
2
+ /*
3
+ * calution: this file was generated automaticlly donot change it.
4
+ */
5
+
6
+ #ifndef ACLNN_PROMPT_KV_CACHE_H_
7
+ #define ACLNN_PROMPT_KV_CACHE_H_
8
+
9
+ #include "aclnn/acl_meta.h"
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /* funtion: aclnnPromptKvCacheGetWorkspaceSize
16
+ * parameters :
17
+ * cache : required
18
+ * update : required
19
+ * validSeqLen : required
20
+ * batchIndex : required
21
+ * seqLenAxis : required
22
+ * newMaxSeqLen : required
23
+ * curMaxSeqLen : required
24
+ * out : required
25
+ * workspaceSize : size of workspace(output).
26
+ * executor : executor context(output).
27
+ */
28
+ __attribute__((visibility("default")))
29
+ aclnnStatus aclnnPromptKvCacheGetWorkspaceSize(
30
+ const aclTensor *cache,
31
+ const aclTensor *update,
32
+ const aclTensor *validSeqLen,
33
+ const aclTensor *batchIndex,
34
+ const aclTensor *seqLenAxis,
35
+ const aclTensor *newMaxSeqLen,
36
+ const aclTensor *curMaxSeqLen,
37
+ const aclTensor *out,
38
+ uint64_t *workspaceSize,
39
+ aclOpExecutor **executor);
40
+
41
+ /* funtion: aclnnPromptKvCache
42
+ * parameters :
43
+ * workspace : workspace memory addr(input).
44
+ * workspaceSize : size of workspace(input).
45
+ * executor : executor context(input).
46
+ * stream : acl stream.
47
+ */
48
+ __attribute__((visibility("default")))
49
+ aclnnStatus aclnnPromptKvCache(
50
+ void *workspace,
51
+ uint64_t workspaceSize,
52
+ aclOpExecutor *executor,
53
+ aclrtStream stream);
54
+
55
+ #ifdef __cplusplus
56
+ }
57
+ #endif
58
+
59
+ #endif
@@ -1,3 +1,8 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
5
+ """
1
6
 
2
7
  import os, sys
3
8
  import ctypes
@@ -6,6 +11,7 @@ import shutil
6
11
  from tbe.common.platform import get_soc_spec
7
12
  from tbe.common.utils import para_check
8
13
  from tbe.tikcpp import compile_op, replay_op, check_op_cap, generalize_op_params, get_code_channel, OpInfo
14
+ from tbe.tikcpp.compile_op import CommonUtility, AscendCLogLevel
9
15
  from tbe.common.buildcfg import get_default_build_config
10
16
  from impl.util.platform_adapter import tbe_register
11
17
  from tbe.common.buildcfg import get_current_build_config
@@ -28,6 +34,7 @@ DTYPE_MAP = {"float32": ["DT_FLOAT", "float"],
28
34
  "dual_sub_int8": ["DT_DUAL_SUB_INT8", "unknown"],
29
35
  "dual_sub_uint8": ["DT_DUAL_SUB_UINT8", "unknown"],
30
36
  "string": ["DT_STRING", "unknown"],
37
+ "complex32": ["DT_COMPLEX32", "unknown"],
31
38
  "complex64": ["DT_COMPLEX64", "unknown"],
32
39
  "complex128": ["DT_COMPLEX128", "unknown"],
33
40
  "qint8": ["DT_QINT8", "unknown"],
@@ -49,21 +56,23 @@ def add_dtype_fmt_option_single(x, x_n, is_ref: bool = False):
49
56
  options.append("-DORIG_DTYPE_{n}={ot}".format(n=x_n_in_kernel, ot=DTYPE_MAP.get(x_dtype)[0]))
50
57
  options.append("-DFORMAT_{n}=FORMAT_{f}".format(n=x_n_in_kernel, f=x_fmt))
51
58
  return options
52
-
59
+
53
60
  def get_dtype_fmt_options(__inputs__, __outputs__):
54
61
  options = []
62
+ input_names = ['gradient']
63
+ output_names = ['is_finite']
55
64
  unique_param_name_set = set()
56
- for x in __inputs__:
65
+ for idx, x in enumerate(__inputs__):
57
66
  if x is None:
58
67
  continue
59
- x_n = x.get("param_name")[:-5].upper()
68
+ x_n = input_names[idx].upper()
60
69
  unique_param_name_set.add(x_n)
61
70
  options += add_dtype_fmt_option_single(x, x_n)
62
-
63
- for x in __outputs__:
71
+
72
+ for idx, x in enumerate(__outputs__):
64
73
  if x is None:
65
74
  continue
66
- x_n = x.get("param_name")[:-5].upper()
75
+ x_n = output_names[idx].upper()
67
76
  if x_n in unique_param_name_set:
68
77
  options += add_dtype_fmt_option_single(x, x_n, True)
69
78
  else:
@@ -74,27 +83,40 @@ def load_dso(so_path):
74
83
  try:
75
84
  ctypes.CDLL(so_path)
76
85
  except OSError as error :
77
- print(error)
86
+ CommonUtility.print_compile_log("", error, AscendCLogLevel.LOG_ERROR)
78
87
  raise RuntimeError("cannot open %s" %(so_path))
79
88
  else:
80
- print("load so succ ", so_path)
89
+ msg = "load so succ " + so_path
90
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
81
91
 
82
92
  def get_shortsoc_compile_option(compile_option_list: list, shortsoc:str):
83
93
  compile_options = []
84
94
  if shortsoc in compile_option_list:
85
- compile_options = compile_option_list[shortsoc]
86
- elif '__ALLSOC__' in compile_option_list:
87
- compile_options = compile_option_list['__ALLSOC__']
95
+ compile_options.extend(compile_option_list[shortsoc])
96
+ if '__ALLSOC__' in compile_option_list:
97
+ compile_options.extend(compile_option_list['__ALLSOC__'])
88
98
  return compile_options
89
99
 
90
100
  def get_kernel_source(src_file, dir_snake, dir_ex):
91
101
  src_ex = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, src_file)
92
102
  if os.path.exists(src_ex):
93
103
  return src_ex
104
+ src = os.environ.get('BUILD_KERNEL_SRC')
105
+ if src and os.path.exists(src):
106
+ return src
94
107
  src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, src_file)
95
108
  if os.path.exists(src):
96
109
  return src
97
110
  src = os.path.join(PYF_PATH, src_file)
111
+ if os.path.exists(src):
112
+ return src
113
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, dir_snake + ".cpp")
114
+ if os.path.exists(src):
115
+ return src
116
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, dir_ex + ".cpp")
117
+ if os.path.exists(src):
118
+ return src
119
+ src = os.path.join(PYF_PATH, "..", "ascendc", os.path.splitext(src_file)[0], src_file)
98
120
  if os.path.exists(src):
99
121
  return src
100
122
  return src_ex
@@ -109,6 +131,8 @@ def _build_args(gradient_in__, is_finite_out_):
109
131
  __inputs__.append(arg[0])
110
132
  else:
111
133
  __inputs__.append(arg)
134
+ else:
135
+ __inputs__.append(arg)
112
136
  __outputs__ = []
113
137
  for arg in [is_finite_out_]:
114
138
  if arg != None:
@@ -118,24 +142,30 @@ def _build_args(gradient_in__, is_finite_out_):
118
142
  __outputs__.append(arg[0])
119
143
  else:
120
144
  __outputs__.append(arg)
145
+ else:
146
+ __outputs__.append(arg)
121
147
  __attrs__ = []
122
148
  return __inputs__, __outputs__, __attrs__
123
149
 
124
150
  @tbe_register.register_operator("AllFinite", trans_bool_to_s8=False)
125
151
  @para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
126
152
  def all_finite(gradient_in__, is_finite_out_, kernel_name="all_finite", impl_mode=""):
153
+ # do ascendc build step
127
154
  if get_current_build_config("enable_op_prebuild"):
128
155
  return
129
156
  __inputs__, __outputs__, __attrs__ = _build_args(gradient_in__, is_finite_out_)
130
157
  options = get_dtype_fmt_options(__inputs__, __outputs__)
131
158
  options += ["-x", "cce"]
132
- bisheng = shutil.which("bisheng")
159
+ bisheng = os.environ.get('BISHENG_REAL_PATH')
160
+ if bisheng is None:
161
+ bisheng = shutil.which("bisheng")
133
162
  if bisheng != None:
134
163
  bisheng_path = os.path.dirname(bisheng)
135
164
  tikcpp_path = os.path.realpath(os.path.join(bisheng_path, "..", "..", "tikcpp"))
136
165
  else:
137
166
  tikcpp_path = os.path.realpath("/usr/local/Ascend/latest/compiler/tikcpp")
138
167
  options.append("-I" + tikcpp_path)
168
+ options.append("-I" + os.path.join(tikcpp_path, "..", "..", "include"))
139
169
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw"))
140
170
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "impl"))
141
171
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "interface"))
@@ -144,8 +174,10 @@ def all_finite(gradient_in__, is_finite_out_, kernel_name="all_finite", impl_mod
144
174
  options.append("-DHIGH_PERFORMANCE=1")
145
175
  elif impl_mode == "high_precision":
146
176
  options.append("-DHIGH_PRECISION=1")
147
- if get_default_build_config("enable_deterministic_mode") == 1:
148
- options.append("-DDETEMINISTIC_MODE=1")
177
+ if get_current_build_config("enable_deterministic_mode") == 1:
178
+ options.append("-DDETERMINISTIC_MODE=1")
179
+ else:
180
+ options.append("-DDETERMINISTIC_MODE=0")
149
181
 
150
182
  custom_compile_options = {},
151
183
  custom_all_compile_options = {},
@@ -162,11 +194,14 @@ def all_finite(gradient_in__, is_finite_out_, kernel_name="all_finite", impl_mod
162
194
  ascendc_src_file = "all_finite.cpp"
163
195
  src = get_kernel_source(ascendc_src_file, ascendc_src_dir, ascendc_src_dir_ex)
164
196
 
165
- print("start compile Ascend C operator AllFinite. kernel name is " + kernel_name)
197
+ msg = "start compile Acend C Operator AllFinite, kernel name is " + kernel_name
198
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
166
199
  op_type = "AllFinite"
167
200
  code_channel = get_code_channel(src, kernel_name, op_type, options)
168
201
  op_info = OpInfo(kernel_name = kernel_name, op_type = op_type, inputs = __inputs__, outputs = __outputs__,\
169
- attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[gradient_in__], origin_outputs = [is_finite_out_])
202
+ attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[gradient_in__], origin_outputs = [is_finite_out_],\
203
+ param_type_dynamic = False, mc2_ctx = [], param_type_list = ['required', 'required'], init_value_list = [None],\
204
+ output_shape_depend_on_compute = [])
170
205
  compile_op(src, origin_func_name, op_info, options, code_channel, '{}')
171
206
 
172
207
  def op_select_format(gradient_in__, is_finite_out_, impl_mode=""):
@@ -0,0 +1,192 @@
1
+ /**
2
+ * Copyright 2023 Huawei Technologies Co., Ltd
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include "kernel_operator.h"
18
+ using namespace AscendC;
19
+ namespace {
20
+ constexpr int32_t kBufferNum = 2;
21
+ const int64_t kDivisor = 4;
22
+ static __aicore__ inline int64_t CeilRound(int64_t value, int64_t divisor) {
23
+ if (divisor == 0) {
24
+ return 0;
25
+ }
26
+ return (value + divisor - 1) / divisor * divisor;
27
+ }
28
+ } // namespace
29
+
30
+ template <typename T>
31
+ class KernelDecoderKvCache {
32
+ public:
33
+ __aicore__ inline KernelDecoderKvCache() {}
34
+
35
+ __aicore__ inline void GetValidSeqLen(GM_ADDR valid_seq_len) {
36
+ int64_t valid_seq_len_ub_size = CeilRound(b_, kDivisor);
37
+ valid_seq_len_gm_.SetGlobalBuffer((__gm__ int64_t *)valid_seq_len, valid_seq_len_ub_size);
38
+ pipe_.InitBuffer(valid_seq_len_queue_, 1, valid_seq_len_ub_size * sizeof(int64_t));
39
+ valid_seq_len_tensor_ = valid_seq_len_queue_.AllocTensor<int64_t>();
40
+ pipe_barrier((pipe_t)PIPE_ALL);
41
+ DataCopy(valid_seq_len_tensor_, valid_seq_len_gm_, valid_seq_len_ub_size);
42
+ pipe_barrier((pipe_t)PIPE_ALL);
43
+ }
44
+
45
+ __aicore__ inline void SplitBh() {
46
+ if (core_idx_ != core_num_ - 1) {
47
+ split_bh_ = f_split_bh_;
48
+ former_block_bh_ = f_f_bh_;
49
+ tail_block_bh_ = f_t_bh_;
50
+ } else {
51
+ split_bh_ = t_split_bh_;
52
+ former_block_bh_ = t_f_bh_;
53
+ tail_block_bh_ = t_t_bh_;
54
+ }
55
+ }
56
+
57
+ __aicore__ inline void Update(GM_ADDR cache, GM_ADDR update, LocalTensor<T> update_in_local_tensor) {
58
+ for (int64_t i = 0; i < split_bh_; i++) {
59
+ int64_t block_bh;
60
+ if (i != split_bh_ - 1) {
61
+ block_bh = former_block_bh_;
62
+ } else {
63
+ block_bh = tail_block_bh_;
64
+ }
65
+ update_gm_.SetGlobalBuffer((__gm__ T *)update + core_idx_ * update_core_stride_ + i * former_block_bh_ * us_ * d_,
66
+ block_bh * us_ * d_);
67
+
68
+ DataCopy(update_in_local_tensor, update_gm_, block_bh * us_ * d_);
69
+ pipe_barrier((pipe_t)PIPE_ALL);
70
+
71
+ update_queue_.EnQue(update_in_local_tensor);
72
+ LocalTensor<T> update_in_local_tensor_out = update_queue_.DeQue<T>();
73
+
74
+ for (int64_t j = 0; j < block_bh; j++) {
75
+ int64_t bh_idx = core_idx_ * former_bh_ + i * former_block_bh_ + j;
76
+ auto b_idx = bh_idx / h_;
77
+ pipe_barrier((pipe_t)PIPE_ALL);
78
+ auto s_idx = valid_seq_len_tensor_.GetValue(b_idx);
79
+ pipe_barrier((pipe_t)PIPE_ALL);
80
+ if (s_idx < 0) {
81
+ continue;
82
+ }
83
+ if (s_idx >= s_) {
84
+ s_idx = s_idx % s_;
85
+ }
86
+ out_gm_.SetGlobalBuffer((__gm__ T *)cache + bh_idx * s_ * d_ + s_idx * d_, us_ * d_);
87
+ int64_t src_offset = j * us_ * d_;
88
+ pipe_barrier((pipe_t)PIPE_ALL);
89
+ DataCopy(out_gm_, update_in_local_tensor_out[src_offset], us_ * d_);
90
+ }
91
+ }
92
+ }
93
+
94
+ __aicore__ inline void InitParam(GM_ADDR tiling) {
95
+ GET_TILING_DATA(tiling_data, tiling);
96
+ core_num_ = tiling_data.core_num;
97
+ b_ = tiling_data.b;
98
+ h_ = tiling_data.h;
99
+ s_ = tiling_data.s;
100
+ d_ = tiling_data.d;
101
+ us_ = tiling_data.us;
102
+ former_bh_ = tiling_data.former_bh;
103
+ tail_bh_ = tiling_data.tail_bh;
104
+ f_split_bh_ = tiling_data.f_split_bh;
105
+ f_f_bh_ = tiling_data.f_f_bh;
106
+ f_t_bh_ = tiling_data.f_t_bh;
107
+ t_split_bh_ = tiling_data.t_split_bh;
108
+ t_f_bh_ = tiling_data.t_f_bh;
109
+ t_t_bh_ = tiling_data.t_t_bh;
110
+ }
111
+
112
+ __aicore__ inline void Process(GM_ADDR cache, GM_ADDR update, GM_ADDR valid_seq_len, GM_ADDR batch_index,
113
+ GM_ADDR new_max_seq_len, GM_ADDR cur_max_seq_len, GM_ADDR tiling) {
114
+ InitParam(tiling);
115
+ core_idx_ = GetBlockIdx();
116
+ if (core_idx_ >= core_num_) {
117
+ return;
118
+ }
119
+
120
+ GetValidSeqLen(valid_seq_len);
121
+ update_core_stride_ = former_bh_ * us_ * d_;
122
+ cache_core_stride_ = former_bh_ * s_ * d_;
123
+
124
+ SplitBh();
125
+
126
+ pipe_.InitBuffer(update_queue_, kBufferNum, former_block_bh_ * us_ * d_ * sizeof(T));
127
+ LocalTensor<T> update_in_local_tensor = update_queue_.AllocTensor<T>();
128
+
129
+ Update(cache, update, update_in_local_tensor);
130
+
131
+ valid_seq_len_queue_.FreeTensor(valid_seq_len_tensor_);
132
+ update_queue_.FreeTensor(update_in_local_tensor);
133
+ }
134
+
135
+ private:
136
+ // gm
137
+ GlobalTensor<T> update_gm_;
138
+ GlobalTensor<int64_t> valid_seq_len_gm_;
139
+ GlobalTensor<int64_t> new_max_seq_len_gm_;
140
+ GlobalTensor<T> out_gm_;
141
+
142
+ // local
143
+ LocalTensor<int64_t> valid_seq_len_tensor_;
144
+
145
+ TPipe pipe_;
146
+ // create queues for input, in this case depth is equal to buffer num
147
+ TQue<QuePosition::VECIN, 1> update_queue_;
148
+ TQue<QuePosition::VECIN, 1> valid_seq_len_queue_;
149
+ TQue<QuePosition::VECIN, 1> new_max_seq_len_queue_;
150
+
151
+ int64_t split_bh_ = 0;
152
+ int64_t former_block_bh_ = 0;
153
+ int64_t tail_block_bh_ = 0;
154
+
155
+ int64_t core_idx_ = 0;
156
+ int64_t cache_core_stride_ = 0;
157
+ int64_t cache_block_length = 0;
158
+ int64_t update_core_stride_ = 0;
159
+ int64_t update_block_length_ = 0;
160
+
161
+ int64_t core_num_ = 0;
162
+ int64_t b_ = 0;
163
+ int64_t h_ = 0;
164
+ int64_t s_ = 0;
165
+ int64_t d_ = 0;
166
+ int64_t us_ = 0;
167
+
168
+ int64_t former_bh_ = 0;
169
+ int64_t tail_bh_ = 0;
170
+ int64_t f_split_bh_ = 0;
171
+ int64_t f_f_bh_ = 0;
172
+ int64_t f_t_bh_ = 0;
173
+ int64_t t_split_bh_ = 0;
174
+ int64_t t_f_bh_ = 0;
175
+ int64_t t_t_bh_ = 0;
176
+ };
177
+
178
+ extern "C" __global__ __aicore__ void decoder_kv_cache(GM_ADDR cache, GM_ADDR update, GM_ADDR valid_seq_len,
179
+ GM_ADDR batch_index, GM_ADDR seq_len_axis,
180
+ GM_ADDR new_max_seq_len, GM_ADDR cur_max_seq_len, GM_ADDR out,
181
+ GM_ADDR workspace, GM_ADDR tiling) {
182
+ if (TILING_KEY_IS(1)) {
183
+ KernelDecoderKvCache<int8_t> op;
184
+ op.Process(cache, update, valid_seq_len, batch_index, new_max_seq_len, cur_max_seq_len, tiling);
185
+ } else if (TILING_KEY_IS(2)) {
186
+ KernelDecoderKvCache<int16_t> op;
187
+ op.Process(cache, update, valid_seq_len, batch_index, new_max_seq_len, cur_max_seq_len, tiling);
188
+ } else if (TILING_KEY_IS(4)) {
189
+ KernelDecoderKvCache<int32_t> op;
190
+ op.Process(cache, update, valid_seq_len, batch_index, new_max_seq_len, cur_max_seq_len, tiling);
191
+ }
192
+ }