mindspore 2.4.1__cp310-none-any.whl → 2.4.10__cp310-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (242) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-310-aarch64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-310-aarch64-linux-gnu.so +0 -0
  4. mindspore/_c_mindrecord.cpython-310-aarch64-linux-gnu.so +0 -0
  5. mindspore/bin/cache_server +0 -0
  6. mindspore/common/api.py +1 -4
  7. mindspore/common/file_system.py +2 -0
  8. mindspore/common/parameter.py +1 -14
  9. mindspore/communication/_comm_helper.py +5 -0
  10. mindspore/context.py +7 -2
  11. mindspore/dataset/engine/datasets_standard_format.py +17 -0
  12. mindspore/dataset/engine/datasets_user_defined.py +27 -1
  13. mindspore/experimental/llm_boost/__init__.py +2 -2
  14. mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
  15. mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
  16. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  17. mindspore/include/api/context.h +1 -1
  18. mindspore/include/dataset/constants.h +2 -2
  19. mindspore/lib/libavcodec.so.59 +0 -0
  20. mindspore/lib/libavdevice.so.59 +0 -0
  21. mindspore/lib/libavfilter.so.8 +0 -0
  22. mindspore/lib/libavformat.so.59 +0 -0
  23. mindspore/lib/libavutil.so.57 +0 -0
  24. mindspore/lib/libmindspore_backend.so +0 -0
  25. mindspore/lib/libmindspore_common.so +0 -0
  26. mindspore/lib/libmindspore_core.so +0 -0
  27. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  28. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  29. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  30. mindspore/lib/libmindspore_ops.so +0 -0
  31. mindspore/lib/libswresample.so.4 +0 -0
  32. mindspore/lib/libswscale.so.6 +0 -0
  33. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  34. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  35. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  36. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +10 -0
  37. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  38. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -42
  39. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.py +51 -16
  40. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.py +51 -16
  41. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  42. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  43. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  44. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  45. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  46. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  47. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  48. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  49. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  50. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  51. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  52. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  53. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  54. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  55. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  56. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  57. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  58. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  59. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  60. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  61. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  62. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  63. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  64. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  65. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  66. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  67. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  68. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  69. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  70. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  71. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  72. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  73. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +302 -0
  74. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +892 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +892 -0
  76. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  77. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  78. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +33 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  80. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +1 -0
  81. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +14 -0
  82. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +59 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +59 -0
  84. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/lib/libcust_opapi.so +0 -0
  85. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.py +51 -16
  86. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +192 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +215 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +274 -0
  89. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +215 -0
  90. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +80 -0
  91. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  92. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +80 -0
  93. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  94. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +80 -0
  95. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  96. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  97. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  98. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  99. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  100. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  101. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  102. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  103. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  104. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  105. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  106. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  107. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  108. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  109. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  110. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  111. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  112. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  113. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  114. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  115. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  116. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  117. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  118. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  119. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  120. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  121. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  122. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  123. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  124. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  125. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  126. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  127. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  128. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  129. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o} +0 -0
  130. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  131. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o} +0 -0
  132. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  133. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o} +0 -0
  134. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  135. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  136. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  137. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  138. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  139. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  140. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  141. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  142. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  143. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  144. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  145. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  146. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  147. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  148. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  149. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  150. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  151. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  152. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  153. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  154. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  155. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  156. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  157. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  158. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  159. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  160. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  161. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  162. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  163. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  164. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  165. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  166. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/all_finite.json +139 -0
  167. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +361 -0
  168. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +892 -0
  169. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +892 -0
  170. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +139 -0
  171. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +361 -0
  172. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +892 -0
  173. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +892 -0
  174. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  175. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  176. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  177. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -0
  178. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  179. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  180. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  181. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  182. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  183. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  184. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  185. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  186. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  187. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  188. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h +6 -1
  189. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  190. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  191. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  192. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  193. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  194. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  195. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  196. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o +0 -0
  197. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  198. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  199. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  200. mindspore/nn/__init__.py +2 -0
  201. mindspore/nn/cell.py +16 -2
  202. mindspore/nn/layer/conv.py +3 -0
  203. mindspore/nn/layer/pooling.py +8 -10
  204. mindspore/nn/utils/__init__.py +22 -0
  205. mindspore/nn/utils/init.py +71 -0
  206. mindspore/ops/_grad_experimental/grad_comm_ops.py +25 -7
  207. mindspore/ops/auto_generate/gen_ops_prim.py +3 -2
  208. mindspore/ops/function/math_func.py +5 -4
  209. mindspore/ops/operations/comm_ops.py +4 -1
  210. mindspore/ops/operations/custom_ops.py +6 -4
  211. mindspore/ops/operations/nn_ops.py +7 -2
  212. mindspore/parallel/_auto_parallel_context.py +23 -4
  213. mindspore/parallel/_cell_wrapper.py +22 -3
  214. mindspore/parallel/_utils.py +0 -1
  215. mindspore/run_check/_check_version.py +17 -8
  216. mindspore/train/callback/_tft_register.py +7 -6
  217. mindspore/train/model.py +1 -0
  218. mindspore/train/serialization.py +4 -1
  219. mindspore/version.py +1 -1
  220. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/METADATA +2 -2
  221. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/RECORD +233 -106
  222. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  223. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  224. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  225. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  226. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  227. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  228. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  229. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  230. mindspore/lib/plugin/ascend/custom_ascendc_ops/version.info +0 -1
  231. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_decoder_kv_cache.h +0 -0
  232. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_prompt_kv_cache.h +0 -0
  233. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.cpp +0 -0
  234. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.cpp +0 -0
  235. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/include/aclnn_all_finite.h +0 -0
  236. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -0
  237. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -0
  238. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.cpp +0 -0
  239. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_proto/inc/op_proto.h +0 -0
  240. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/WHEEL +0 -0
  241. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
  242. {mindspore-2.4.1.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,16 @@
15
15
  """llm boost"""
16
16
  import json
17
17
  import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
18
+ from mindspore.experimental.llm_boost.atb.boost_base import (
19
+ AtbBoostBase,
20
+ PositionEmbeddingType,
21
+ NormType,
22
+ )
19
23
  from mindspore._c_expression import LlmBoostBinder
20
24
  from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
25
 
26
+ CPP_LLAMA_MODEL_CLASS_NAME = "llama_LlamaDecoderModel"
27
+
22
28
 
23
29
  @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
24
30
  class LlamaBoost(AtbBoostBase):
@@ -30,14 +36,17 @@ class LlamaBoost(AtbBoostBase):
30
36
  self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
37
  self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
38
  self.atb_encoder_operation = LlmBoostBinder(
33
- "ATB", "llama_parallel_DecoderModel")
39
+ self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
40
+ )
34
41
  self.atb_decoder_operation = LlmBoostBinder(
35
- "ATB", "llama_parallel_DecoderModel")
42
+ self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
43
+ )
36
44
 
37
45
  def init(self):
38
46
  """set param"""
39
47
  coder_param = {
40
- "rmsNormEps": self.config.rms_norm_eps,
48
+ "normEps": self.config.rms_norm_eps,
49
+ "normType": NormType.RMS_NORM,
41
50
  "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
42
51
  "hiddenSizePerAttentionHead": self.head_dim,
43
52
  "numHiddenLayers": self.num_layers,
@@ -46,32 +55,41 @@ class LlamaBoost(AtbBoostBase):
46
55
  "isFA": False,
47
56
  "isBF16": self.dtype == mstype.bfloat16,
48
57
  "packQuantType": [[1, 1] for _ in range(self.num_layers)],
49
- "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
50
- "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
58
+ "linearQuantType": [
59
+ [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
60
+ ],
61
+ "linearTransposeType": [
62
+ [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
63
+ ],
51
64
  "isEmbeddingParallel": False,
52
65
  "isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
53
66
  "lmHeadTransposeType": 1,
54
- "supportSwiGLU": True,
55
- "kvQuant": self.kv_quant is not None,
67
+ "enableSwiGLU": True,
68
+ "enablekvQuant": self.kv_quant is not None,
56
69
  "rank": self.rank_id,
57
70
  "worldSize": self.device_num,
58
- "backend": "lccl",
71
+ "backend": self.config.communication_backend,
59
72
  "rankTableFile": "",
60
- "positionEmbeddingType": self.position_embedding_type,
73
+ "positionEmbeddingType": PositionEmbeddingType.ROPE,
61
74
  "hiddenSize": self.config.hidden_size,
62
75
  "gemma": False,
63
- "enableAddNorm": True,
64
- "supportCompressHead": False,
76
+ "enableAddNorm": False,
77
+ "enableCompressHead": False,
78
+ "isUnpadInputs": True,
65
79
  }
66
80
  encoder_param = {
67
- **coder_param, "isPrefill": True,
68
- "supportLcoc": True,
69
- "supportSpeculate": False,
70
- "skipWordEmbedding": False
81
+ **coder_param,
82
+ "isPrefill": True,
83
+ "enableLcoc": True,
84
+ "enableSpeculate": False,
85
+ "skipWordEmbedding": False,
86
+ "enableSplitFuse": False,
71
87
  }
72
88
  decoder_param = {
73
- **coder_param, "isPrefill": False, "supportLcoc": False,
74
- "supportSpeculate": False
89
+ **coder_param,
90
+ "isPrefill": False,
91
+ "enableLcoc": False,
92
+ "enableSpeculate": False,
75
93
  }
76
94
  self.atb_encoder_operation.init(json.dumps({**encoder_param}))
77
95
  self.atb_decoder_operation.init(json.dumps({**decoder_param}))
@@ -92,14 +110,15 @@ class LlamaBoost(AtbBoostBase):
92
110
  **kwargs
93
111
  ):
94
112
  """prepare inputs"""
95
- self.acl_param = json.dumps({
96
- "seqLen": seqLen,
97
- })
98
- self.acl_decoder_operation_inputs[0] = self.cast(
99
- input_ids, mstype.int64)
113
+ self.acl_param = json.dumps(
114
+ {
115
+ "seqLen": seqLen,
116
+ }
117
+ )
118
+
119
+ self.acl_decoder_operation_inputs[0] = input_ids
100
120
  self.acl_decoder_operation_inputs[1] = self.placeholder
101
- self.acl_decoder_operation_inputs[2] = self.cast(
102
- position_ids, mstype.int32)
121
+ self.acl_decoder_operation_inputs[2] = position_ids
103
122
  self.acl_decoder_operation_inputs[3] = cos_embed
104
123
  self.acl_decoder_operation_inputs[4] = sin_embed
105
124
  self.acl_decoder_operation_inputs[5] = attention_mask
@@ -108,8 +127,6 @@ class LlamaBoost(AtbBoostBase):
108
127
  self.acl_decoder_operation_inputs[8] = self.placeholder
109
128
  self.acl_decoder_operation_inputs[9] = self.placeholder
110
129
  self.acl_decoder_operation_inputs[10] = self.placeholder
111
- self.acl_decoder_operation_inputs[11] = self.cast(
112
- input_lengths, mstype.int32)
113
- self.acl_decoder_operation_inputs[12] = self.cast(
114
- lm_head_indices, mstype.int64)
130
+ self.acl_decoder_operation_inputs[11] = input_lengths
131
+ self.acl_decoder_operation_inputs[12] = lm_head_indices
115
132
  return self.acl_decoder_operation_inputs, self.acl_param
@@ -15,11 +15,14 @@
15
15
  """llm boost"""
16
16
  import json
17
17
  import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
18
+ from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase, NormType
19
19
  from mindspore._c_expression import LlmBoostBinder
20
20
  from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
21
 
22
22
 
23
+ CPP_QWEN_MODEL_CLASS_NAME = "qwen_QwenDecoderModel"
24
+
25
+
23
26
  @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
24
27
  class QwenBoost(AtbBoostBase):
25
28
  """QwenBoost class"""
@@ -30,9 +33,11 @@ class QwenBoost(AtbBoostBase):
30
33
  self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
34
  self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
35
  self.atb_encoder_operation = LlmBoostBinder(
33
- "ATB", "qwen_DecoderModel")
36
+ self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
37
+ )
34
38
  self.atb_decoder_operation = LlmBoostBinder(
35
- "ATB", "qwen_DecoderModel")
39
+ self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
40
+ )
36
41
 
37
42
  def init(self):
38
43
  """set param"""
@@ -42,24 +47,43 @@ class QwenBoost(AtbBoostBase):
42
47
  "withEmbedding": True,
43
48
  "isEmbeddingParallel": True,
44
49
  "isLmHeadParallel": True,
45
- "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
50
+ "linearTransposeType": [
51
+ [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
52
+ ],
46
53
  "lmHeadTransposeType": 1,
47
- "supportSwiGLU": not self.need_nz,
48
- "rmsNormEps": self.config.rms_norm_eps,
54
+ "enableSwiGLU": not self.need_nz,
55
+ "normEps": self.config.rms_norm_eps,
56
+ "normType": NormType.RMS_NORM,
49
57
  "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
50
58
  "hiddenSizePerAttentionHead": self.head_dim,
51
59
  "numHiddenLayers": self.num_layers,
52
60
  "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
53
61
  "rank": self.rank_id,
54
62
  "worldSize": self.device_num,
55
- "backend": "lccl",
63
+ "backend": self.config.communication_backend,
56
64
  "packQuantType": [[1, 1] for _ in range(self.num_layers)],
57
- "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
58
- "kvQuant": self.kv_quant is not None,
65
+ "linearQuantType": [
66
+ [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
67
+ ],
68
+ "linearHasBias": [[True, False, False, False]] * self.num_layers,
69
+ "enableKvQuant": self.kv_quant is not None,
70
+ "enableLora": False,
71
+ "isUnpadInputs": True,
72
+ "enableAddNorm": False,
73
+ }
74
+ encoder_param = {
75
+ **param_dict,
76
+ "isPrefill": True,
77
+ "enableLcoc": False,
78
+ "enableSplitFuse": False,
79
+ }
80
+ decoder_param = {
81
+ **param_dict,
82
+ "isPrefill": False,
83
+ "enableLcoc": False,
84
+ "enableSpeculate": False,
85
+ "enablePrefixCache": False,
59
86
  }
60
- encoder_param = {**param_dict, "isPrefill": True, "supportLcoc": False}
61
- decoder_param = {**param_dict, "isPrefill": False,
62
- "supportLcoc": False, "supportSpeculate": False}
63
87
  self.atb_encoder_operation.init(json.dumps({**encoder_param}))
64
88
  self.atb_decoder_operation.init(json.dumps({**decoder_param}))
65
89
 
@@ -79,13 +103,14 @@ class QwenBoost(AtbBoostBase):
79
103
  **kwargs
80
104
  ):
81
105
  """prepare inputs"""
82
- self.acl_param = json.dumps({
83
- "seqLen": seqLen,
84
- })
85
- self.acl_decoder_operation_inputs[0] = self.cast(
86
- input_ids, mstype.int64)
87
- self.acl_decoder_operation_inputs[1] = self.cast(
88
- position_ids, mstype.int32)
106
+ self.acl_param = json.dumps(
107
+ {
108
+ "seqLen": seqLen,
109
+ }
110
+ )
111
+
112
+ self.acl_decoder_operation_inputs[0] = input_ids
113
+ self.acl_decoder_operation_inputs[1] = position_ids
89
114
  self.acl_decoder_operation_inputs[2] = cos_embed
90
115
  self.acl_decoder_operation_inputs[3] = sin_embed
91
116
  self.acl_decoder_operation_inputs[4] = attention_mask
@@ -93,9 +118,7 @@ class QwenBoost(AtbBoostBase):
93
118
  self.acl_decoder_operation_inputs[6] = slots
94
119
  self.acl_decoder_operation_inputs[7] = self.placeholder
95
120
  self.acl_decoder_operation_inputs[8] = self.placeholder
96
- self.acl_decoder_operation_inputs[9] = self.cast(
97
- input_lengths, mstype.int32)
98
- self.acl_decoder_operation_inputs[10] = self.cast(
99
- lm_head_indices, mstype.int64)
100
- self.acl_decoder_operation_inputs[11] = self.placeholder
121
+ self.acl_decoder_operation_inputs[9] = self.placeholder
122
+ self.acl_decoder_operation_inputs[10] = input_lengths
123
+ self.acl_decoder_operation_inputs[11] = lm_head_indices
101
124
  return self.acl_decoder_operation_inputs, self.acl_param
@@ -236,7 +236,7 @@ std::string DeviceInfoContext::GetProviderDevice() const { return CharToString(G
236
236
  void DeviceInfoContext::SetProviderDevice(const std::string &device) { SetProviderDevice(StringToChar(device)); }
237
237
 
238
238
  /// \brief Derived from DeviceInfoContext, The configuration of the model running auto on the Host Devices, include
239
- /// CPU/GPU/NPU/Ascend310/Ascend910. This option is only valid for MindSpore Lite.
239
+ /// CPU/GPU/NPU/Ascend. This option is only valid for MindSpore Lite.
240
240
  class MS_API AutoDeviceInfo : public DeviceInfoContext {
241
241
  public:
242
242
  /// \brief Get the type of this DeviceInfoContext.
@@ -108,8 +108,8 @@ enum class DATASET_API ManualOffloadMode {
108
108
  enum class DATASET_API MapTargetDevice {
109
109
  kCpu = 0, ///< CPU Device.
110
110
  kGpu, ///< Gpu Device.
111
- kAscend310, ///< Ascend310 Device.
112
- kAscend910B, ///< Ascend910B Device.
111
+ kAscend310, ///<
112
+ kAscend910B, ///<
113
113
  kInvalid = 100
114
114
  };
115
115
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,10 @@
1
+ {
2
+ "DecoderKvCache": {
3
+ "isGray": false,
4
+ "isHeavy": false
5
+ },
6
+ "PromptKvCache": {
7
+ "isGray": false,
8
+ "isHeavy": false
9
+ }
10
+ }
@@ -1,46 +1,4 @@
1
1
  {
2
- "AllFinite":{
3
- "dynamicCompileStatic":{
4
- "flag":"true"
5
- },
6
- "dynamicFormat":{
7
- "flag":"true"
8
- },
9
- "dynamicRankSupport":{
10
- "flag":"true"
11
- },
12
- "dynamicShapeSupport":{
13
- "flag":"true"
14
- },
15
- "input0":{
16
- "dtype":"float16,float32,bfloat16",
17
- "format":"ND,ND,ND",
18
- "name":"gradient",
19
- "paramType":"required",
20
- "shape":"all",
21
- "unknownshape_format":"ND,ND,ND"
22
- },
23
- "needCheckSupport":{
24
- "flag":"false"
25
- },
26
- "opFile":{
27
- "value":"all_finite"
28
- },
29
- "opInterface":{
30
- "value":"all_finite"
31
- },
32
- "output0":{
33
- "dtype":"bool,bool,bool",
34
- "format":"ND,ND,ND",
35
- "name":"is_finite",
36
- "paramType":"required",
37
- "shape":"all",
38
- "unknownshape_format":"ND,ND,ND"
39
- },
40
- "precision_reduce":{
41
- "flag":"true"
42
- }
43
- },
44
2
  "DecoderKvCache":{
45
3
  "dynamicCompileStatic":{
46
4
  "flag":"true"
@@ -1,3 +1,8 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
5
+ """
1
6
 
2
7
  import os, sys
3
8
  import ctypes
@@ -6,6 +11,7 @@ import shutil
6
11
  from tbe.common.platform import get_soc_spec
7
12
  from tbe.common.utils import para_check
8
13
  from tbe.tikcpp import compile_op, replay_op, check_op_cap, generalize_op_params, get_code_channel, OpInfo
14
+ from tbe.tikcpp.compile_op import CommonUtility, AscendCLogLevel
9
15
  from tbe.common.buildcfg import get_default_build_config
10
16
  from impl.util.platform_adapter import tbe_register
11
17
  from tbe.common.buildcfg import get_current_build_config
@@ -28,6 +34,7 @@ DTYPE_MAP = {"float32": ["DT_FLOAT", "float"],
28
34
  "dual_sub_int8": ["DT_DUAL_SUB_INT8", "unknown"],
29
35
  "dual_sub_uint8": ["DT_DUAL_SUB_UINT8", "unknown"],
30
36
  "string": ["DT_STRING", "unknown"],
37
+ "complex32": ["DT_COMPLEX32", "unknown"],
31
38
  "complex64": ["DT_COMPLEX64", "unknown"],
32
39
  "complex128": ["DT_COMPLEX128", "unknown"],
33
40
  "qint8": ["DT_QINT8", "unknown"],
@@ -49,21 +56,23 @@ def add_dtype_fmt_option_single(x, x_n, is_ref: bool = False):
49
56
  options.append("-DORIG_DTYPE_{n}={ot}".format(n=x_n_in_kernel, ot=DTYPE_MAP.get(x_dtype)[0]))
50
57
  options.append("-DFORMAT_{n}=FORMAT_{f}".format(n=x_n_in_kernel, f=x_fmt))
51
58
  return options
52
-
59
+
53
60
  def get_dtype_fmt_options(__inputs__, __outputs__):
54
61
  options = []
62
+ input_names = ['cache', 'update', 'valid_seq_len', 'batch_index', 'seq_len_axis', 'new_max_seq_len', 'cur_max_seq_len']
63
+ output_names = ['out']
55
64
  unique_param_name_set = set()
56
- for x in __inputs__:
65
+ for idx, x in enumerate(__inputs__):
57
66
  if x is None:
58
67
  continue
59
- x_n = x.get("param_name")[:-5].upper()
68
+ x_n = input_names[idx].upper()
60
69
  unique_param_name_set.add(x_n)
61
70
  options += add_dtype_fmt_option_single(x, x_n)
62
-
63
- for x in __outputs__:
71
+
72
+ for idx, x in enumerate(__outputs__):
64
73
  if x is None:
65
74
  continue
66
- x_n = x.get("param_name")[:-5].upper()
75
+ x_n = output_names[idx].upper()
67
76
  if x_n in unique_param_name_set:
68
77
  options += add_dtype_fmt_option_single(x, x_n, True)
69
78
  else:
@@ -74,27 +83,40 @@ def load_dso(so_path):
74
83
  try:
75
84
  ctypes.CDLL(so_path)
76
85
  except OSError as error :
77
- print(error)
86
+ CommonUtility.print_compile_log("", error, AscendCLogLevel.LOG_ERROR)
78
87
  raise RuntimeError("cannot open %s" %(so_path))
79
88
  else:
80
- print("load so succ ", so_path)
89
+ msg = "load so succ " + so_path
90
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
81
91
 
82
92
  def get_shortsoc_compile_option(compile_option_list: list, shortsoc:str):
83
93
  compile_options = []
84
94
  if shortsoc in compile_option_list:
85
- compile_options = compile_option_list[shortsoc]
86
- elif '__ALLSOC__' in compile_option_list:
87
- compile_options = compile_option_list['__ALLSOC__']
95
+ compile_options.extend(compile_option_list[shortsoc])
96
+ if '__ALLSOC__' in compile_option_list:
97
+ compile_options.extend(compile_option_list['__ALLSOC__'])
88
98
  return compile_options
89
99
 
90
100
  def get_kernel_source(src_file, dir_snake, dir_ex):
91
101
  src_ex = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, src_file)
92
102
  if os.path.exists(src_ex):
93
103
  return src_ex
104
+ src = os.environ.get('BUILD_KERNEL_SRC')
105
+ if src and os.path.exists(src):
106
+ return src
94
107
  src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, src_file)
95
108
  if os.path.exists(src):
96
109
  return src
97
110
  src = os.path.join(PYF_PATH, src_file)
111
+ if os.path.exists(src):
112
+ return src
113
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_snake, dir_snake + ".cpp")
114
+ if os.path.exists(src):
115
+ return src
116
+ src = os.path.join(PYF_PATH, "..", "ascendc", dir_ex, dir_ex + ".cpp")
117
+ if os.path.exists(src):
118
+ return src
119
+ src = os.path.join(PYF_PATH, "..", "ascendc", os.path.splitext(src_file)[0], src_file)
98
120
  if os.path.exists(src):
99
121
  return src
100
122
  return src_ex
@@ -109,6 +131,8 @@ def _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, s
109
131
  __inputs__.append(arg[0])
110
132
  else:
111
133
  __inputs__.append(arg)
134
+ else:
135
+ __inputs__.append(arg)
112
136
  __outputs__ = []
113
137
  for arg in [out_out_]:
114
138
  if arg != None:
@@ -118,24 +142,30 @@ def _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, s
118
142
  __outputs__.append(arg[0])
119
143
  else:
120
144
  __outputs__.append(arg)
145
+ else:
146
+ __outputs__.append(arg)
121
147
  __attrs__ = []
122
148
  return __inputs__, __outputs__, __attrs__
123
149
 
124
150
  @tbe_register.register_operator("DecoderKvCache", trans_bool_to_s8=False)
125
151
  @para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
126
152
  def decoder_kv_cache(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_, kernel_name="decoder_kv_cache", impl_mode=""):
153
+ # do ascendc build step
127
154
  if get_current_build_config("enable_op_prebuild"):
128
155
  return
129
156
  __inputs__, __outputs__, __attrs__ = _build_args(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_)
130
157
  options = get_dtype_fmt_options(__inputs__, __outputs__)
131
158
  options += ["-x", "cce"]
132
- bisheng = shutil.which("bisheng")
159
+ bisheng = os.environ.get('BISHENG_REAL_PATH')
160
+ if bisheng is None:
161
+ bisheng = shutil.which("bisheng")
133
162
  if bisheng != None:
134
163
  bisheng_path = os.path.dirname(bisheng)
135
164
  tikcpp_path = os.path.realpath(os.path.join(bisheng_path, "..", "..", "tikcpp"))
136
165
  else:
137
166
  tikcpp_path = os.path.realpath("/usr/local/Ascend/latest/compiler/tikcpp")
138
167
  options.append("-I" + tikcpp_path)
168
+ options.append("-I" + os.path.join(tikcpp_path, "..", "..", "include"))
139
169
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw"))
140
170
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "impl"))
141
171
  options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "interface"))
@@ -144,8 +174,10 @@ def decoder_kv_cache(cache_in__, update_in__, valid_seq_len_in__, batch_index_in
144
174
  options.append("-DHIGH_PERFORMANCE=1")
145
175
  elif impl_mode == "high_precision":
146
176
  options.append("-DHIGH_PRECISION=1")
147
- if get_default_build_config("enable_deterministic_mode") == 1:
148
- options.append("-DDETEMINISTIC_MODE=1")
177
+ if get_current_build_config("enable_deterministic_mode") == 1:
178
+ options.append("-DDETERMINISTIC_MODE=1")
179
+ else:
180
+ options.append("-DDETERMINISTIC_MODE=0")
149
181
 
150
182
  custom_compile_options = {},
151
183
  custom_all_compile_options = {},
@@ -162,11 +194,14 @@ def decoder_kv_cache(cache_in__, update_in__, valid_seq_len_in__, batch_index_in
162
194
  ascendc_src_file = "decoder_kv_cache.cpp"
163
195
  src = get_kernel_source(ascendc_src_file, ascendc_src_dir, ascendc_src_dir_ex)
164
196
 
165
- print("start compile Ascend C operator DecoderKvCache. kernel name is " + kernel_name)
197
+ msg = "start compile Acend C Operator DecoderKvCache, kernel name is " + kernel_name
198
+ CommonUtility.print_compile_log("", msg, AscendCLogLevel.LOG_INFO)
166
199
  op_type = "DecoderKvCache"
167
200
  code_channel = get_code_channel(src, kernel_name, op_type, options)
168
201
  op_info = OpInfo(kernel_name = kernel_name, op_type = op_type, inputs = __inputs__, outputs = __outputs__,\
169
- attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__], origin_outputs = [out_out_])
202
+ attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__], origin_outputs = [out_out_],\
203
+ param_type_dynamic = False, mc2_ctx = [], param_type_list = ['required', 'required', 'required', 'required', 'required', 'required', 'required', 'required'], init_value_list = [None],\
204
+ output_shape_depend_on_compute = [])
170
205
  compile_op(src, origin_func_name, op_info, options, code_channel, '{}')
171
206
 
172
207
  def op_select_format(cache_in__, update_in__, valid_seq_len_in__, batch_index_in__, seq_len_axis_in__, new_max_seq_len_in__, cur_max_seq_len_in__, out_out_, impl_mode=""):