mindspore 2.4.1__cp311-cp311-manylinux1_x86_64.whl → 2.4.10__cp311-cp311-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (260) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-311-x86_64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-311-x86_64-linux-gnu.so +0 -0
  4. mindspore/_c_mindrecord.cpython-311-x86_64-linux-gnu.so +0 -0
  5. mindspore/bin/cache_admin +0 -0
  6. mindspore/bin/cache_server +0 -0
  7. mindspore/common/api.py +1 -4
  8. mindspore/common/file_system.py +2 -0
  9. mindspore/common/parameter.py +1 -14
  10. mindspore/communication/_comm_helper.py +5 -0
  11. mindspore/context.py +7 -2
  12. mindspore/dataset/engine/datasets_standard_format.py +17 -0
  13. mindspore/dataset/engine/datasets_user_defined.py +27 -1
  14. mindspore/experimental/llm_boost/__init__.py +2 -2
  15. mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
  16. mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
  17. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  18. mindspore/include/api/context.h +1 -1
  19. mindspore/include/dataset/constants.h +2 -2
  20. mindspore/lib/libavcodec.so.59 +0 -0
  21. mindspore/lib/libavdevice.so.59 +0 -0
  22. mindspore/lib/libavfilter.so.8 +0 -0
  23. mindspore/lib/libavformat.so.59 +0 -0
  24. mindspore/lib/libavutil.so.57 +0 -0
  25. mindspore/lib/libdnnl.so.2 +0 -0
  26. mindspore/lib/libicuuc.so.69 +0 -0
  27. mindspore/lib/libmindspore_backend.so +0 -0
  28. mindspore/lib/libmindspore_common.so +0 -0
  29. mindspore/lib/libmindspore_core.so +0 -0
  30. mindspore/lib/libmindspore_glog.so.0 +0 -0
  31. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  32. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  33. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  34. mindspore/lib/libmindspore_ops.so +0 -0
  35. mindspore/lib/libopencv_core.so.4.5 +0 -0
  36. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  37. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  38. mindspore/lib/libswresample.so.4 +0 -0
  39. mindspore/lib/libswscale.so.6 +0 -0
  40. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  41. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  42. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  43. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +10 -0
  44. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  45. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -42
  46. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.py +51 -16
  47. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.py +51 -16
  48. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  49. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  50. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  51. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  52. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  53. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  54. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  55. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  56. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  57. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  58. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  59. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  60. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  61. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  62. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  63. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  64. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  65. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  66. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  67. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  68. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  69. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  70. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  71. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  72. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  73. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  74. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  76. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  77. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  78. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  80. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +302 -0
  81. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +892 -0
  82. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +892 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/x86_64/libcust_opmaster_rt2.0.so +0 -0
  84. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  85. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +33 -0
  86. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/x86_64/libcust_opsproto_rt2.0.so +0 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +1 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +14 -0
  89. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +59 -0
  90. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +59 -0
  91. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/lib/libcust_opapi.so +0 -0
  92. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.py +51 -16
  93. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +192 -0
  94. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +215 -0
  95. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +274 -0
  96. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +215 -0
  97. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +80 -0
  98. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  99. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +80 -0
  100. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  101. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +80 -0
  102. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  103. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  104. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  105. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  106. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  107. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  108. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  109. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  110. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  111. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  112. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  113. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  114. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  115. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  116. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  117. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  118. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  119. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  120. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  121. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  122. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  123. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  124. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  125. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  126. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  127. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  128. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  129. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  130. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  131. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  132. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  133. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  134. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  135. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  136. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o} +0 -0
  137. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  138. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o} +0 -0
  139. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  140. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o} +0 -0
  141. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  142. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  143. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  144. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  145. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  146. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  147. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  148. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  149. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  150. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  151. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  152. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  153. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  154. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  155. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  156. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  157. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  158. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  159. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  160. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  161. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  162. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  163. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  164. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  165. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  166. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  167. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  168. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  169. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  170. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  171. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  172. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  173. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/all_finite.json +139 -0
  174. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +361 -0
  175. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +892 -0
  176. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +892 -0
  177. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +139 -0
  178. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +361 -0
  179. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +892 -0
  180. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +892 -0
  181. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/x86_64/libcust_opmaster_rt2.0.so +0 -0
  182. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  183. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/x86_64/libcust_opsproto_rt2.0.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -0
  185. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  186. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  187. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  188. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  189. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  190. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  191. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  192. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  193. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  194. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h +6 -1
  195. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  196. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  197. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  198. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  199. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  200. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  201. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  202. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  203. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  204. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  205. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  206. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  207. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  208. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  209. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  210. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  211. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  212. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  213. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  214. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  215. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  216. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  217. mindspore/nn/__init__.py +2 -0
  218. mindspore/nn/cell.py +16 -2
  219. mindspore/nn/layer/conv.py +3 -0
  220. mindspore/nn/layer/pooling.py +8 -10
  221. mindspore/nn/utils/__init__.py +22 -0
  222. mindspore/nn/utils/init.py +71 -0
  223. mindspore/ops/_grad_experimental/grad_comm_ops.py +25 -7
  224. mindspore/ops/auto_generate/gen_ops_prim.py +3 -2
  225. mindspore/ops/function/math_func.py +5 -4
  226. mindspore/ops/operations/comm_ops.py +4 -1
  227. mindspore/ops/operations/custom_ops.py +6 -4
  228. mindspore/ops/operations/nn_ops.py +7 -2
  229. mindspore/parallel/_auto_parallel_context.py +23 -4
  230. mindspore/parallel/_cell_wrapper.py +22 -3
  231. mindspore/parallel/_utils.py +0 -1
  232. mindspore/run_check/_check_version.py +17 -8
  233. mindspore/train/callback/_tft_register.py +7 -6
  234. mindspore/train/model.py +1 -0
  235. mindspore/train/serialization.py +4 -1
  236. mindspore/version.py +1 -1
  237. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/METADATA +2 -2
  238. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/RECORD +250 -123
  239. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
  240. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  241. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  242. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  243. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  244. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  245. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/x86_64/libcust_opmaster_rt2.0.so +0 -0
  246. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  247. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/x86_64/libcust_opsproto_rt2.0.so +0 -0
  248. mindspore/lib/plugin/ascend/custom_ascendc_ops/version.info +0 -1
  249. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_decoder_kv_cache.h +0 -0
  250. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_prompt_kv_cache.h +0 -0
  251. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.cpp +0 -0
  252. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.cpp +0 -0
  253. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/include/aclnn_all_finite.h +0 -0
  254. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -0
  255. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -0
  256. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.cpp +0 -0
  257. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_proto/inc/op_proto.h +0 -0
  258. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/WHEEL +0 -0
  259. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
  260. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
5
+ """
6
+
7
+ import os, sys
8
+ import ctypes
9
+ import json
10
+ import shutil
11
+ from tbe.common.platform import get_soc_spec
12
+ from tbe.common.utils import para_check
13
+ from tbe.tikcpp import compile_op, replay_op, check_op_cap, generalize_op_params, get_code_channel, OpInfo
14
+ from tbe.tikcpp.compile_op import CommonUtility, AscendCLogLevel
15
+ from tbe.common.buildcfg import get_default_build_config
16
+ from impl.util.platform_adapter import tbe_register
17
+ from tbe.common.buildcfg import get_current_build_config
18
+ PYF_PATH = os.path.dirname(os.path.realpath(__file__))
19
+
20
+ DTYPE_MAP = {"float32": ["DT_FLOAT", "float"],
21
+ "float16": ["DT_FLOAT16", "half"],
22
+ "int8": ["DT_INT8", "int8_t"],
23
+ "int16": ["DT_INT16", "int16_t"],
24
+ "int32": ["DT_INT32", "int32_t"],
25
+ "int64": ["DT_INT64", "int64_t"],
26
+ "uint1": ["DT_UINT1", "uint8_t"],
27
+ "uint8": ["DT_UINT8", "uint8_t"],
28
+ "uint16": ["DT_UINT16", "uint16_t"],
29
+ "uint32": ["DT_UINT32", "uint32_t"],
30
+ "uint64": ["DT_UINT64", "uint64_t"],
31
+ "bool": ["DT_BOOL", "bool"],
32
+ "double": ["DT_DOUBLE", "double"],
33
+ "dual": ["DT_DUAL", "unknown"],
34
+ "dual_sub_int8": ["DT_DUAL_SUB_INT8", "unknown"],
35
+ "dual_sub_uint8": ["DT_DUAL_SUB_UINT8", "unknown"],
36
+ "string": ["DT_STRING", "unknown"],
37
+ "complex32": ["DT_COMPLEX32", "unknown"],
38
+ "complex64": ["DT_COMPLEX64", "unknown"],
39
+ "complex128": ["DT_COMPLEX128", "unknown"],
40
+ "qint8": ["DT_QINT8", "unknown"],
41
+ "qint16": ["DT_QINT16", "unknown"],
42
+ "qint32": ["DT_QINT32", "unknown"],
43
+ "quint8": ["DT_QUINT8", "unknown"],
44
+ "quint16": ["DT_QUINT16", "unknown"],
45
+ "resource": ["DT_RESOURCE", "unknown"],
46
+ "string_ref": ["DT_STRING_REF", "unknown"],
47
+ "int4": ["DT_INT4", "int4b_t"],
48
+ "bfloat16": ["DT_BF16", "bfloat16_t"]}
49
+
50
+ def add_dtype_fmt_option_single(x, x_n, is_ref: bool = False):
51
+ options = []
52
+ x_fmt = x.get("format")
53
+ x_dtype = x.get("dtype")
54
+ x_n_in_kernel = x_n + '_REF' if is_ref else x_n
55
+ options.append("-DDTYPE_{n}={t}".format(n=x_n_in_kernel, t=DTYPE_MAP.get(x_dtype)[1]))
56
+ options.append("-DORIG_DTYPE_{n}={ot}".format(n=x_n_in_kernel, ot=DTYPE_MAP.get(x_dtype)[0]))
57
+ options.append("-DFORMAT_{n}=FORMAT_{f}".format(n=x_n_in_kernel, f=x_fmt))
58
+ return options
59
+
60
+ def get_dtype_fmt_options(__inputs__, __outputs__):
61
+ options = []
62
+ input_names = ['cache', 'update', 'valid_seq_len', 'batch_index', 'seq_len_axis', 'new_max_seq_len', 'cur_max_seq_len']
63
+ output_names = ['out']
64
+ unique_param_name_set = set()
65
+ for idx, x in enumerate(__inputs__):
66
+ if x is None:
67
+ continue
68
+ x_n = input_names[idx].upper()
69
+ unique_param_name_set.add(x_n)
70
+ options += add_dtype_fmt_option_single(x, x_n)
71
+
72
+ for idx, x in enumerate(__outputs__):
73
+ if x is None:
74
+ continue
75
+ x_n = output_names[idx].upper()
76
+ if x_n in unique_param_name_set:
77
+ options += add_dtype_fmt_option_single(x, x_n, True)
78
+ else:
79
+ options += add_dtype_fmt_option_single(x, x_n)
80
+ return options
81
+
82
+ def load_dso(so_path):
83
+ try:
84
+ ctypes.CDLL(so_path)
85
+ except OSError as error :
86
+ CommonUtility.print_compile_log("", error, AscendCLogLevel.LOG_ERROR)
87
+ raise RuntimeError("cannot open %s" %(so_path))
88
+ else:
89
+ msg = "load so succ " + so_path
90
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
91
+
92
+ def get_shortsoc_compile_option(compile_option_list: list, shortsoc:str):
93
+ compile_options = []
94
+ if shortsoc in compile_option_list:
95
+ compile_options.extend(compile_option_list[shortsoc])
96
+ if '__ALLSOC__' in compile_option_list:
97
+ compile_options.extend(compile_option_list['__ALLSOC__'])
98
+ return compile_options
99
+
100
+ def get_kernel_source(src_file, dir_snake, dir_ex):
101
+ src_ex = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, src_file)
102
+ if os.path.exists(src_ex):
103
+ return src_ex
104
+ src = os.environ.get('BUILD_KERNEL_SRC')
105
+ if src and os.path.exists(src):
106
+ return src
107
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, src_file)
108
+ if os.path.exists(src):
109
+ return src
110
+ src = os.path.join(PYF_PATH, src_file)
111
+ if os.path.exists(src):
112
+ return src
113
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, dir_snake + ".cpp")
114
+ if os.path.exists(src):
115
+ return src
116
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, dir_ex + ".cpp")
117
+ if os.path.exists(src):
118
+ return src
119
+ src = os.path.join(PYF_PATH, "..", "ascendc", os.path.splitext(src_file)[0], src_file)
120
+ if os.path.exists(src):
121
+ return src
122
+ return src_ex
123
+
124
+ def _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_):
125
+ __inputs__ = []
126
+ for arg in [cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__]:
127
+ if arg != None:
128
+ if isinstance(arg, (list, tuple)):
129
+ if len(arg) == 0:
130
+ continue
131
+ __inputs__.append(arg[0])
132
+ else:
133
+ __inputs__.append(arg)
134
+ else:
135
+ __inputs__.append(arg)
136
+ __outputs__ = []
137
+ for arg in [out_out_]:
138
+ if arg != None:
139
+ if isinstance(arg, (list, tuple)):
140
+ if len(arg) == 0:
141
+ continue
142
+ __outputs__.append(arg[0])
143
+ else:
144
+ __outputs__.append(arg)
145
+ else:
146
+ __outputs__.append(arg)
147
+ __attrs__ = []
148
+ return __inputs__, __outputs__, __attrs__
149
+
150
+ @tbe_register.register_operator("DecoderKvCache", trans_bool_to_s8=False)
151
+ @para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
152
+ def decoder_kv_cache(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_, kernel_name="decoder_kv_cache", impl_mode=""):
153
+ # do ascendc build step
154
+ if get_current_build_config("enable_op_prebuild"):
155
+ return
156
+ __inputs__, __outputs__, __attrs__ = _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_)
157
+ options = get_dtype_fmt_options(__inputs__, __outputs__)
158
+ options += ["-x", "cce"]
159
+ bisheng = os.environ.get('BISHENG_REAL_PATH')
160
+ if bisheng is None:
161
+ bisheng = shutil.which("bisheng")
162
+ if bisheng != None:
163
+ bisheng_path = os.path.dirname(bisheng)
164
+ tikcpp_path = os.path.realpath(os.path.join(bisheng_path, "..", "..", "tikcpp"))
165
+ else:
166
+ tikcpp_path = os.path.realpath("/usr/local/Ascend/latest/compiler/tikcpp")
167
+ options.append("-I" + tikcpp_path)
168
+ options.append("-I" + os.path.join(tikcpp_path, "..", "..", "include"))
169
+ options.append("-I" + os.path.join(tikcpp_path, "tikcfw"))
170
+ options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "impl"))
171
+ options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "interface"))
172
+ options.append("-I" + os.path.join(PYF_PATH, "..", "ascendc", "common"))
173
+ if impl_mode == "high_performance":
174
+ options.append("-DHIGH_PERFORMANCE=1")
175
+ elif impl_mode == "high_precision":
176
+ options.append("-DHIGH_PRECISION=1")
177
+ if get_current_build_config("enable_deterministic_mode") == 1:
178
+ options.append("-DDETERMINISTIC_MODE=1")
179
+ else:
180
+ options.append("-DDETERMINISTIC_MODE=0")
181
+
182
+ custom_compile_options = {},
183
+ custom_all_compile_options = {},
184
+ soc_version = get_soc_spec("SOC_VERSION")
185
+ soc_short = get_soc_spec("SHORT_SOC_VERSION").lower()
186
+ custom_compile_options_soc = get_shortsoc_compile_option(custom_compile_options[0], soc_short)
187
+ custom_all_compile_options_soc = get_shortsoc_compile_option(custom_all_compile_options[0], soc_short)
188
+ options += custom_all_compile_options_soc
189
+ options += custom_compile_options_soc
190
+
191
+ origin_func_name = "decoder_kv_cache"
192
+ ascendc_src_dir_ex = "decoder_kv_cache"
193
+ ascendc_src_dir = "decoder_kv_cache"
194
+ ascendc_src_file = "decoder_kv_cache.cpp"
195
+ src = get_kernel_source(ascendc_src_file, ascendc_src_dir, ascendc_src_dir_ex)
196
+
197
+ msg = "start compile Acend C Operator DecoderKvCache, kernel name is " + kernel_name
198
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
199
+ op_type = "DecoderKvCache"
200
+ code_channel = get_code_channel(src, kernel_name, op_type, options)
201
+ op_info = OpInfo(kernel_name = kernel_name, op_type = op_type, inputs = __inputs__, outputs = __outputs__,\
202
+ attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__], origin_outputs = [out_out_],\
203
+ param_type_dynamic = False, mc2_ctx = [], param_type_list = ['required', 'required', 'required', 'required', 'required', 'required', 'required', 'required'], init_value_list = [None],\
204
+ output_shape_depend_on_compute = [])
205
+ compile_op(src, origin_func_name, op_info, options, code_channel, '{}')
206
+
207
+ def op_select_format(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_, impl_mode=""):
208
+ __inputs__, __outputs__, __attrs__ = _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_)
209
+ result = check_op_cap("op_select_format", "DecoderKvCache", __inputs__, __outputs__, __attrs__)
210
+ return result.decode("utf-8")
211
+
212
+ def get_op_specific_info(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_, impl_mode=""):
213
+ __inputs__, __outputs__, __attrs__ = _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_)
214
+ result = check_op_cap("get_op_specific_info", "DecoderKvCache", __inputs__, __outputs__, __attrs__)
215
+ return result.decode("utf-8")
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Copyright 2023 Huawei Technologies Co., Ltd
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include "kernel_operator.h"
18
+ #include "kernel_utils.h"
19
+ using namespace AscendC;
20
+
21
+ namespace {
22
+ constexpr int64_t kBufferNum = 1;
23
+ const int64_t kDivisor = 4;
24
+ static __aicore__ inline int64_t CeilRound(int64_t value, int64_t divisor) {
25
+ if (divisor == 0) {
26
+ return 0;
27
+ }
28
+ return (value + divisor - 1) / divisor * divisor;
29
+ }
30
+ } // namespace
31
+
32
+ template <typename T>
33
+ class KernelPromptKvCache {
34
+ public:
35
+ __aicore__ inline KernelPromptKvCache() {}
36
+
37
+ __aicore__ inline void GetIndex(GM_ADDR batch_index, GM_ADDR valid_seq_len) {
38
+ int64_t batch_index_ub_num = CeilRound(ub_, kDivisor);
39
+ int64_t valid_seq_len_ub_num = CeilRound(ub_, kDivisor);
40
+ batch_index_gm_.SetGlobalBuffer((__gm__ int64_t *)batch_index, batch_index_ub_num);
41
+
42
+ int64_t total_num = batch_index_ub_num + valid_seq_len_ub_num;
43
+ pipe_.InitBuffer(index_queue_, 1, total_num * sizeof(int64_t));
44
+ batch_index_tensor_ = index_queue_.AllocTensor<int64_t>();
45
+ DataCopy(batch_index_tensor_, batch_index_gm_, batch_index_ub_num);
46
+
47
+ valid_seq_len_gm_.SetGlobalBuffer((__gm__ int64_t *)valid_seq_len, valid_seq_len_ub_num);
48
+ valid_seq_len_tensor_ = batch_index_tensor_[batch_index_ub_num];
49
+ DataCopy(valid_seq_len_tensor_, valid_seq_len_gm_, valid_seq_len_ub_num);
50
+ }
51
+
52
+ __aicore__ inline void UpdateCache(GM_ADDR cache, GM_ADDR update) {
53
+ pipe_.InitBuffer(update_queue_, kBufferNum, former_block_us_ * d_ * sizeof(T));
54
+ for (int64_t i = 0; i < each_core_bs_num_; ++i) {
55
+ int64_t bh_idx = core_idx_ * former_each_core_bs_num_ + i;
56
+ int64_t ub_idx = bh_idx / h_;
57
+ int64_t h_idx = bh_idx % h_;
58
+ pipe_barrier((pipe_t)PIPE_ALL);
59
+ int64_t cache_b_idx = batch_index_tensor_.GetValue(ub_idx);
60
+ int64_t s_idx = valid_seq_len_tensor_.GetValue(ub_idx);
61
+ if (cache_b_idx < 0 || cache_b_idx >= b_) {
62
+ continue;
63
+ }
64
+ if (s_idx < 0 || s_idx + us_ > s_) {
65
+ continue;
66
+ }
67
+
68
+ for (int64_t j = 0; j < split_us_; ++j) {
69
+ int64_t u_block_len;
70
+ if (j == split_us_ - 1) {
71
+ u_block_len = tail_block_us_ * d_;
72
+ } else {
73
+ u_block_len = former_block_us_ * d_;
74
+ }
75
+ LocalTensor<T> update_in_local_tensor = update_queue_.AllocTensor<T>();
76
+ update_gm_.SetGlobalBuffer(
77
+ (__gm__ T *)update + ub_idx * update_b_stride_ + h_idx * update_h_stride_ + j * former_block_us_ * d_,
78
+ u_block_len);
79
+ out_gm_.SetGlobalBuffer((__gm__ T *)cache + cache_b_idx * cache_b_stride_ + h_idx * cache_h_stride_ +
80
+ s_idx * d_ + j * former_block_us_ * d_,
81
+ u_block_len);
82
+ pipe_barrier((pipe_t)PIPE_ALL);
83
+ DataCopy(update_in_local_tensor, update_gm_, u_block_len);
84
+ update_queue_.EnQue(update_in_local_tensor);
85
+ LocalTensor<T> update_in_local_tensor_out = update_queue_.DeQue<T>();
86
+ pipe_barrier((pipe_t)PIPE_ALL);
87
+ DataCopy(out_gm_, update_in_local_tensor_out, u_block_len);
88
+ update_queue_.FreeTensor(update_in_local_tensor_out);
89
+ }
90
+ }
91
+ }
92
+
93
+ __aicore__ inline void InitParam(GM_ADDR tiling) {
94
+ GET_TILING_DATA(tiling_data, tiling);
95
+ core_num_ = tiling_data.core_num;
96
+ b_ = tiling_data.b;
97
+ h_ = tiling_data.h;
98
+ s_ = tiling_data.s;
99
+ d_ = tiling_data.d;
100
+ ub_ = tiling_data.ub;
101
+ us_ = tiling_data.us;
102
+ former_each_core_bs_num_ = tiling_data.former_each_core_bs_num;
103
+ tail_each_core_bs_num_ = tiling_data.tail_each_core_bs_num;
104
+ split_us_ = tiling_data.split_us;
105
+ former_block_us_ = tiling_data.former_block_us;
106
+ tail_block_us_ = tiling_data.tail_block_us;
107
+ }
108
+
109
+ __aicore__ inline void Process(GM_ADDR cache, GM_ADDR update, GM_ADDR valid_seq_len, GM_ADDR batch_index,
110
+ GM_ADDR seq_len_axis, GM_ADDR new_max_seq_len, GM_ADDR cur_max_seq_len,
111
+ GM_ADDR tiling_data) {
112
+ core_idx_ = GetBlockIdx();
113
+ InitParam(tiling_data);
114
+ if (core_idx_ >= core_num_) {
115
+ return;
116
+ }
117
+ if (g_coreType == AIC) {
118
+ return;
119
+ }
120
+ if (core_idx_ != core_num_ - 1) {
121
+ each_core_bs_num_ = former_each_core_bs_num_;
122
+ } else {
123
+ each_core_bs_num_ = tail_each_core_bs_num_;
124
+ }
125
+
126
+ GetIndex(batch_index, valid_seq_len);
127
+
128
+ cache_h_stride_ = s_ * d_;
129
+ cache_b_stride_ = h_ * cache_h_stride_;
130
+
131
+ update_h_stride_ = us_ * d_;
132
+ update_b_stride_ = h_ * update_h_stride_;
133
+
134
+ UpdateCache(cache, update);
135
+ index_queue_.FreeTensor(batch_index_tensor_);
136
+ }
137
+
138
+ private:
139
+ // gm
140
+ GlobalTensor<T> update_gm_;
141
+ GlobalTensor<int64_t> valid_seq_len_gm_;
142
+ GlobalTensor<int64_t> batch_index_gm_;
143
+ GlobalTensor<int64_t> new_max_seq_len_gm_;
144
+ GlobalTensor<T> out_gm_;
145
+
146
+ // local gm
147
+ LocalTensor<int64_t> valid_seq_len_tensor_;
148
+ LocalTensor<int64_t> batch_index_tensor_;
149
+
150
+ TPipe pipe_;
151
+ TQue<QuePosition::VECIN, 1> update_queue_;
152
+ TQue<QuePosition::VECIN, 1> index_queue_;
153
+ TQue<QuePosition::VECIN, 1> new_max_seq_len_queue_;
154
+
155
+ int64_t core_idx_ = 0;
156
+ int64_t core_num_ = 0;
157
+ int64_t each_core_bs_num_ = 0;
158
+ int64_t former_each_core_bs_num_ = 0;
159
+ int64_t tail_each_core_bs_num_ = 0;
160
+ int64_t b_ = 0;
161
+ int64_t h_ = 0;
162
+ int64_t s_ = 0;
163
+ int64_t d_ = 0;
164
+ int64_t ub_ = 0;
165
+ int64_t us_ = 0;
166
+ int64_t split_us_ = 0;
167
+ int64_t former_block_us_ = 0;
168
+ int64_t tail_block_us_ = 0;
169
+
170
+ int64_t cache_b_stride_ = 0;
171
+ int64_t cache_h_stride_ = 0;
172
+ int64_t update_b_stride_ = 0;
173
+ int64_t update_h_stride_ = 0;
174
+ };
175
+
176
+ template <typename T>
177
+ class KernelPromptKvCacheCopyAll {
178
+ public:
179
+ __aicore__ inline KernelPromptKvCacheCopyAll() {}
180
+
181
+ __aicore__ inline void InitParam(GM_ADDR tiling) {
182
+ GET_TILING_DATA(tiling_data, tiling);
183
+ core_num_ = tiling_data.core_num;
184
+ former_each_core_bs_num_ = tiling_data.former_each_core_bs_num;
185
+ tail_each_core_bs_num_ = tiling_data.tail_each_core_bs_num;
186
+ split_us_ = tiling_data.split_us;
187
+ former_block_us_ = tiling_data.former_block_us;
188
+ tail_block_us_ = tiling_data.tail_block_us;
189
+ }
190
+
191
+ __aicore__ inline void Process(GM_ADDR cache, GM_ADDR update, GM_ADDR valid_seq_len, GM_ADDR batch_index,
192
+ GM_ADDR seq_len_axis, GM_ADDR new_max_seq_len, GM_ADDR cur_max_seq_len,
193
+ GM_ADDR tiling_data) {
194
+ core_idx_ = GetBlockIdx();
195
+ InitParam(tiling_data);
196
+ if (core_idx_ >= core_num_) {
197
+ return;
198
+ }
199
+ if (g_coreType == AIC) {
200
+ return;
201
+ }
202
+ if (core_idx_ != core_num_ - 1) {
203
+ each_core_bs_num_ = former_each_core_bs_num_;
204
+ } else {
205
+ each_core_bs_num_ = tail_each_core_bs_num_;
206
+ }
207
+ pipe_barrier((pipe_t)PIPE_ALL);
208
+
209
+ pipe_.InitBuffer(update_queue_, kBufferNum, former_block_us_ * sizeof(T));
210
+ for (int64_t i = 0; i < split_us_; ++i) {
211
+ int64_t u_block_len;
212
+ if (i == split_us_ - 1) {
213
+ u_block_len = tail_block_us_;
214
+ } else {
215
+ u_block_len = former_block_us_;
216
+ }
217
+ LocalTensor<T> update_in_local_tensor = update_queue_.AllocTensor<T>();
218
+ update_gm_.SetGlobalBuffer((__gm__ T *)update + core_idx_ * former_each_core_bs_num_ + i * former_block_us_,
219
+ u_block_len);
220
+ out_gm_.SetGlobalBuffer((__gm__ T *)cache + core_idx_ * former_each_core_bs_num_ + i * former_block_us_,
221
+ u_block_len);
222
+ pipe_barrier((pipe_t)PIPE_ALL);
223
+ DataCopy(update_in_local_tensor, update_gm_, u_block_len);
224
+ update_queue_.EnQue(update_in_local_tensor);
225
+ LocalTensor<T> update_in_local_tensor_out = update_queue_.DeQue<T>();
226
+ pipe_barrier((pipe_t)PIPE_ALL);
227
+ DataCopy(out_gm_, update_in_local_tensor_out, u_block_len);
228
+ update_queue_.FreeTensor(update_in_local_tensor_out);
229
+ }
230
+ }
231
+
232
+ private:
233
+ // gm
234
+ GlobalTensor<T> update_gm_;
235
+ GlobalTensor<T> out_gm_;
236
+
237
+ TPipe pipe_;
238
+ TQue<QuePosition::VECIN, 1> update_queue_;
239
+
240
+ int64_t core_idx_ = 0;
241
+ int64_t core_num_ = 0;
242
+ int64_t each_core_bs_num_ = 0;
243
+ int64_t former_each_core_bs_num_ = 0;
244
+ int64_t tail_each_core_bs_num_ = 0;
245
+
246
+ int64_t split_us_ = 0;
247
+ int64_t former_block_us_ = 0;
248
+ int64_t tail_block_us_ = 0;
249
+ };
250
+
251
+ extern "C" __global__ __aicore__ void prompt_kv_cache(GM_ADDR cache, GM_ADDR update, GM_ADDR valid_seq_len,
252
+ GM_ADDR batch_index, GM_ADDR seq_len_axis,
253
+ GM_ADDR new_max_seq_len, GM_ADDR cur_max_seq_len, GM_ADDR out,
254
+ GM_ADDR workspace, GM_ADDR tiling) {
255
+ if (TILING_KEY_IS(1)) {
256
+ KernelPromptKvCache<int8_t> op;
257
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
258
+ } else if (TILING_KEY_IS(2)) {
259
+ KernelPromptKvCache<int16_t> op;
260
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
261
+ } else if (TILING_KEY_IS(4)) {
262
+ KernelPromptKvCache<int32_t> op;
263
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
264
+ } else if (TILING_KEY_IS(10)) {
265
+ KernelPromptKvCacheCopyAll<int8_t> op;
266
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
267
+ } else if (TILING_KEY_IS(20)) {
268
+ KernelPromptKvCacheCopyAll<int16_t> op;
269
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
270
+ } else if (TILING_KEY_IS(40)) {
271
+ KernelPromptKvCacheCopyAll<int32_t> op;
272
+ op.Process(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, tiling);
273
+ }
274
+ }