mindspore 2.4.0__cp39-none-any.whl → 2.4.10__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (306) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  4. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  5. mindspore/bin/cache_server +0 -0
  6. mindspore/common/api.py +1 -4
  7. mindspore/common/file_system.py +2 -0
  8. mindspore/common/initializer.py +51 -15
  9. mindspore/common/parameter.py +6 -5
  10. mindspore/common/tensor.py +15 -49
  11. mindspore/communication/_comm_helper.py +5 -0
  12. mindspore/communication/comm_func.py +7 -7
  13. mindspore/context.py +16 -2
  14. mindspore/dataset/engine/datasets_standard_format.py +17 -0
  15. mindspore/dataset/engine/datasets_user_defined.py +27 -1
  16. mindspore/experimental/llm_boost/__init__.py +2 -2
  17. mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
  18. mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
  19. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  20. mindspore/include/api/context.h +1 -1
  21. mindspore/include/dataset/constants.h +2 -2
  22. mindspore/include/mindapi/base/format.h +13 -0
  23. mindspore/lib/libavcodec.so.59 +0 -0
  24. mindspore/lib/libavdevice.so.59 +0 -0
  25. mindspore/lib/libavfilter.so.8 +0 -0
  26. mindspore/lib/libavformat.so.59 +0 -0
  27. mindspore/lib/libavutil.so.57 +0 -0
  28. mindspore/lib/libdnnl.so.2 +0 -0
  29. mindspore/lib/libmindspore_backend.so +0 -0
  30. mindspore/lib/libmindspore_common.so +0 -0
  31. mindspore/lib/libmindspore_core.so +0 -0
  32. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  33. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  34. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  35. mindspore/lib/libmindspore_ops.so +0 -0
  36. mindspore/lib/libopencv_core.so.4.5 +0 -0
  37. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  38. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  39. mindspore/lib/libswresample.so.4 +0 -0
  40. mindspore/lib/libswscale.so.6 +0 -0
  41. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  42. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  43. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  44. mindspore/lib/plugin/ascend/custom_ascendc_910/framework/npu_supported_ops.json +10 -0
  45. mindspore/lib/plugin/ascend/custom_ascendc_910/op_api/lib/libcust_opapi.so +0 -0
  46. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +0 -42
  47. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.py +51 -16
  48. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.py +51 -16
  49. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  50. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  51. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  52. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  53. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  54. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  55. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  56. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  57. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  58. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  59. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  60. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  61. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  62. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  63. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  64. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  65. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  66. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  67. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  68. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  69. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  70. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  71. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  72. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  73. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  74. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  76. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  77. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  78. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  80. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/ascend910/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  81. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/binary_info_config.json +302 -0
  82. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/decoder_kv_cache.json +892 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/kernel/config/ascend910/prompt_kv_cache.json +892 -0
  84. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  85. mindspore/lib/plugin/ascend/custom_ascendc_910/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  86. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/inc/op_proto.h +33 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_910/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_910/version.info +1 -0
  89. mindspore/lib/plugin/ascend/custom_ascendc_910b/framework/npu_supported_ops.json +14 -0
  90. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_decoder_kv_cache.h +59 -0
  91. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_api/include/aclnn_prompt_kv_cache.h +59 -0
  92. mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/lib/libcust_opapi.so +0 -0
  93. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.py +51 -16
  94. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.cpp +192 -0
  95. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/decoder_kv_cache.py +215 -0
  96. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.cpp +274 -0
  97. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl/dynamic/prompt_kv_cache.py +215 -0
  98. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +80 -0
  99. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o +0 -0
  100. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +80 -0
  101. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o +0 -0
  102. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +80 -0
  103. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o +0 -0
  104. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +158 -0
  105. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  106. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +158 -0
  107. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  108. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +158 -0
  109. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  110. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +158 -0
  111. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  112. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +158 -0
  113. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  114. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +158 -0
  115. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  116. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +158 -0
  117. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  118. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +158 -0
  119. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  120. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +167 -0
  121. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  122. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +167 -0
  123. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  124. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +167 -0
  125. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  126. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +167 -0
  127. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  128. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +167 -0
  129. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  130. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +167 -0
  131. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  132. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +167 -0
  133. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  134. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +167 -0
  135. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend310p/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  136. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.json +78 -0
  137. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_52f59e2a65d9b1bb002de35c2819754a.o} +0 -0
  138. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.json +78 -0
  139. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_6b5e50e30256d85838d6ce83514df20f.o} +0 -0
  140. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.json +78 -0
  141. mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o → custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_74e4ac02880d452e3308c94af273562e.o} +0 -0
  142. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.json +156 -0
  143. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_0d5520cc587ad44ce634bf3fbcffc272.o +0 -0
  144. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.json +156 -0
  145. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_20390d30b3c4c0d23167ccca6c030c2b.o +0 -0
  146. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.json +156 -0
  147. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_2d151f0b1d2db51faa2968d5b67544e2.o +0 -0
  148. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.json +156 -0
  149. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_561690ec17cc1def3d2fcf68c1b07b56.o +0 -0
  150. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.json +156 -0
  151. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_570f9aaa99e5e773b3dd0a33784363f4.o +0 -0
  152. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.json +156 -0
  153. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_59668a0f0764afb98fda8ab9e84126f1.o +0 -0
  154. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.json +156 -0
  155. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_91d9833e4792b70b670e4e2b916abd86.o +0 -0
  156. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.json +156 -0
  157. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/decoder_kv_cache/DecoderKvCache_c74cdc5fef094383401856f8519504af.o +0 -0
  158. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.json +165 -0
  159. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0515c7b1a4cd614449e38c5e9a7e3f8d.o +0 -0
  160. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.json +165 -0
  161. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_09f22d898d6358c91e7c4fc48bac48e7.o +0 -0
  162. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.json +165 -0
  163. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_0cb9a6f894b925250227136e5aab7061.o +0 -0
  164. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.json +165 -0
  165. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_2fa8702ffd7ca85e9e194f62644415d5.o +0 -0
  166. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.json +165 -0
  167. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_570b62f187dfd439b64613d881deedb7.o +0 -0
  168. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.json +165 -0
  169. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_585218c11411ff84709b9e725b66c435.o +0 -0
  170. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.json +165 -0
  171. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_5c9365ccde170b358c5b126d69dae13e.o +0 -0
  172. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.json +165 -0
  173. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/ascend910b/prompt_kv_cache/PromptKvCache_6d97c45b7c43bc16fcff8baa5dacac4e.o +0 -0
  174. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/all_finite.json +139 -0
  175. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/binary_info_config.json +361 -0
  176. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/decoder_kv_cache.json +892 -0
  177. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend310p/prompt_kv_cache.json +892 -0
  178. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +139 -0
  179. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +361 -0
  180. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/decoder_kv_cache.json +892 -0
  181. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/kernel/config/ascend910b/prompt_kv_cache.json +892 -0
  182. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  183. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info +1 -0
  186. mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
  187. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  188. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  189. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  192. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  193. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  194. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  195. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +5 -5
  196. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  197. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  198. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
  199. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  200. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +1 -0
  201. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h +6 -1
  202. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +6 -1
  203. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +4 -3
  204. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  205. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  206. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_impl.so +0 -0
  207. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
  208. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  209. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_impl.so +0 -0
  210. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_old_impl.so +0 -0
  211. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
  212. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_old_impl.so +0 -0
  213. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
  214. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
  215. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
  216. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  217. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_impl.so +0 -0
  218. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
  219. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
  220. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_impl.so +0 -0
  221. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_old_impl.so +0 -0
  222. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
  223. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  224. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  225. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  226. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  227. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  228. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  229. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  230. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
  231. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
  232. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
  233. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
  234. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
  235. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
  236. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  237. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  238. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  239. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  240. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  241. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  242. mindspore/mint/__init__.py +490 -2
  243. mindspore/mint/nn/__init__.py +2 -2
  244. mindspore/mint/optim/adamw.py +6 -14
  245. mindspore/nn/__init__.py +2 -0
  246. mindspore/nn/cell.py +16 -4
  247. mindspore/nn/layer/basic.py +24 -7
  248. mindspore/nn/layer/conv.py +3 -0
  249. mindspore/nn/layer/embedding.py +31 -14
  250. mindspore/nn/layer/pooling.py +8 -10
  251. mindspore/nn/optim/tft_wrapper.py +12 -15
  252. mindspore/nn/utils/__init__.py +22 -0
  253. mindspore/nn/utils/init.py +71 -0
  254. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  255. mindspore/ops/_grad_experimental/grad_comm_ops.py +45 -8
  256. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
  257. mindspore/ops/auto_generate/gen_extend_func.py +33 -0
  258. mindspore/ops/auto_generate/gen_ops_def.py +52 -3
  259. mindspore/ops/auto_generate/gen_ops_prim.py +158 -8
  260. mindspore/ops/function/array_func.py +2 -0
  261. mindspore/ops/function/math_func.py +12 -5
  262. mindspore/ops/function/random_func.py +221 -7
  263. mindspore/ops/operations/__init__.py +1 -1
  264. mindspore/ops/operations/array_ops.py +3 -1
  265. mindspore/ops/operations/comm_ops.py +25 -1
  266. mindspore/ops/operations/custom_ops.py +6 -4
  267. mindspore/ops/operations/manually_defined/ops_def.py +8 -10
  268. mindspore/ops/operations/nn_ops.py +7 -2
  269. mindspore/parallel/_auto_parallel_context.py +26 -5
  270. mindspore/parallel/_cell_wrapper.py +24 -3
  271. mindspore/parallel/_tensor.py +46 -2
  272. mindspore/parallel/_utils.py +39 -21
  273. mindspore/parallel/transform_safetensors.py +196 -43
  274. mindspore/profiler/profiling.py +5 -1
  275. mindspore/run_check/_check_version.py +20 -9
  276. mindspore/train/_utils.py +92 -32
  277. mindspore/train/callback/_checkpoint.py +12 -9
  278. mindspore/train/callback/_on_request_exit.py +12 -1
  279. mindspore/train/callback/_tft_register.py +33 -9
  280. mindspore/train/dataset_helper.py +10 -2
  281. mindspore/train/model.py +21 -0
  282. mindspore/train/serialization.py +12 -19
  283. mindspore/version.py +1 -1
  284. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/METADATA +9 -7
  285. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/RECORD +297 -170
  286. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  287. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  288. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  289. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  290. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  291. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  292. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  293. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  294. mindspore/lib/plugin/ascend/custom_ascendc_ops/version.info +0 -1
  295. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_decoder_kv_cache.h +0 -0
  296. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910}/op_api/include/aclnn_prompt_kv_cache.h +0 -0
  297. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/decoder_kv_cache.cpp +0 -0
  298. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910/op_impl/ai_core/tbe/custom_ascendc_910_impl}/dynamic/prompt_kv_cache.cpp +0 -0
  299. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_api/include/aclnn_all_finite.h +0 -0
  300. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -0
  301. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +0 -0
  302. /mindspore/lib/plugin/ascend/{custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl → custom_ascendc_910b/op_impl/ai_core/tbe/custom_ascendc_910b_impl}/dynamic/all_finite.cpp +0 -0
  303. /mindspore/lib/plugin/ascend/{custom_ascendc_ops → custom_ascendc_910b}/op_proto/inc/op_proto.h +0 -0
  304. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/WHEEL +0 -0
  305. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
  306. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
@@ -13,17 +13,32 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """boost base class"""
16
+ from enum import Enum
16
17
  import numpy as np
17
18
  import mindspore as ms
18
19
  from mindspore import ops, Tensor
20
+ from mindspore import log as logger
19
21
  from mindspore.ops import operations as P
20
22
  import mindspore.common.dtype as mstype
21
23
  from mindspore._c_expression import _set_format
22
-
23
24
  from mindspore.common.parameter import Parameter
24
25
  from mindspore.experimental.llm_boost.utils import get_real_rank, get_real_group_size
25
26
  from mindspore.common.initializer import Zero
26
27
 
28
+ FORMAT_NZ = "FRACTAL_NZ"
29
+ BUILDIN_BACKEND_NAME = "ATB"
30
+
31
+
32
+ class PositionEmbeddingType(int, Enum):
33
+ ROPE = 0
34
+ ALIBI = 1
35
+ ABSOLUTE = 2
36
+
37
+
38
+ class NormType(int, Enum):
39
+ RMS_NORM = 0
40
+ LAYER_NORM = 1
41
+
27
42
 
28
43
  class AttentionMask:
29
44
  """attention mask"""
@@ -31,30 +46,34 @@ class AttentionMask:
31
46
  @classmethod
32
47
  def static(cls, max_seq_len, dtype=mstype.float16, need_nz=False):
33
48
  """cache mask"""
34
- bias_cache = Tensor(np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))).reshape(max_seq_len,
35
- max_seq_len)
49
+ bias_cache = Tensor(
50
+ np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))
51
+ ).reshape(max_seq_len, max_seq_len)
36
52
  bias_cache = ~bias_cache
37
53
  if dtype == mstype.float16:
38
54
  mask_value = Tensor(np.finfo(np.float32).min, mstype.float16)
39
55
  else:
40
56
  mask_value = Tensor(1)
41
- attn_mask = ops.masked_fill(Tensor(np.zeros(
42
- (max_seq_len, max_seq_len)), dtype=mstype.float16), bias_cache, mask_value)
57
+ attn_mask = ops.masked_fill(
58
+ Tensor(np.zeros((max_seq_len, max_seq_len)), dtype=mstype.float16),
59
+ bias_cache,
60
+ mask_value,
61
+ )
43
62
  if need_nz:
44
63
  # ND -> NZ
45
64
  attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len))
46
- attn_mask = ops.reshape(
47
- attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
65
+ attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
48
66
  attn_mask = ops.transpose(attn_mask, (0, 2, 1, 3)).contiguous()
49
- attn_mask = _set_format(attn_mask, "FRACTAL_NZ")
67
+ attn_mask = _set_format(attn_mask, FORMAT_NZ)
50
68
  return attn_mask
51
69
 
52
70
 
53
- class AtbBoostBase():
71
+ class AtbBoostBase:
54
72
  """atb boost base class"""
55
73
 
56
74
  def __init__(self, config):
57
75
  super().__init__()
76
+ self.backend_name = BUILDIN_BACKEND_NAME
58
77
  self.is_first_iteration = False
59
78
  self.config = config
60
79
  self.dtype = config.compute_dtype
@@ -68,27 +87,98 @@ class AtbBoostBase():
68
87
  self.need_nz = config.need_nz
69
88
  self.placeholder = Tensor(np.zeros(1), dtype=self.dtype)
70
89
  self.lm_head_indices_fake = Tensor([0], dtype=mstype.int64)
71
- self.position_embedding_type = "ROPE"
90
+ self.position_embedding_type = PositionEmbeddingType.ROPE
72
91
  self.add_norm_enable = True
73
92
  self.max_decode_length = self.config.max_decode_length
74
93
  self.max_base_len = 128
75
94
  self.attn_mask = AttentionMask.static(
76
- self.max_base_len, dtype=self.dtype, need_nz=self.need_nz)
95
+ self.max_base_len, dtype=self.dtype, need_nz=self.need_nz
96
+ )
77
97
 
78
98
  self.cast = P.Cast()
79
99
  self.reshape = P.Reshape()
80
100
  self.kv_quant = None
81
101
  self.rank_id = get_real_rank()
82
102
  self.device_num = get_real_group_size()
103
+ self.ascend_weight = []
104
+ self.k_caches = []
105
+ self.v_caches = []
83
106
 
84
107
  def _convert_tensor_format_and_dtype(self, tensor, dtype=mstype.float16):
85
108
  tensor = self.cast(tensor, dtype=dtype)
86
109
  if self.need_nz:
87
- tensor = _set_format(tensor, "FRACTAL_NZ")
110
+ tensor = _set_format(tensor, FORMAT_NZ)
88
111
  return tensor
89
112
 
113
+ def _convert_qkv_concat_weight(self, param_dict):
114
+ """convert qkv concat weight"""
115
+ assume_num_layers = 500
116
+ for i in range(assume_num_layers):
117
+ # qkv weight concat
118
+ wq_weight_name = f"model.layers.{i}.attention.wq.weight"
119
+ wk_weight_name = f"model.layers.{i}.attention.wk.weight"
120
+ wv_weight_name = f"model.layers.{i}.attention.wv.weight"
121
+ qkv_concat_weight_name = f"model.layers.{i}.attention.w_qkv.weight"
122
+ if wq_weight_name not in param_dict:
123
+ break
124
+ wq_weight = param_dict[wq_weight_name].asnumpy()
125
+ wk_weight = param_dict[wk_weight_name].asnumpy()
126
+ wv_weight = param_dict[wv_weight_name].asnumpy()
127
+ qkv_weight = np.concatenate((wq_weight, wk_weight, wv_weight), 0)
128
+ param_dict[qkv_concat_weight_name] = Parameter(
129
+ qkv_weight, name=qkv_concat_weight_name
130
+ )
131
+
132
+ # gate hidden weight concat
133
+ ffn_gate_weight_name = f"model.layers.{i}.feed_forward.w1.weight"
134
+ ffn_hidden_weight_name = f"model.layers.{i}.feed_forward.w3.weight"
135
+ gate_hidden_concat_weight_name = (
136
+ f"model.layers.{i}.feed_forward.w_gate_hidden.weight"
137
+ )
138
+
139
+ ffn_gate_weight = param_dict[ffn_gate_weight_name].asnumpy()
140
+ ffn_hidden_weight = param_dict[ffn_hidden_weight_name].asnumpy()
141
+ gate_hidden_weight = np.concatenate((ffn_gate_weight, ffn_hidden_weight), 0)
142
+ param_dict[gate_hidden_concat_weight_name] = Parameter(
143
+ gate_hidden_weight, name=gate_hidden_concat_weight_name
144
+ )
145
+
146
+ param_dict.pop(wq_weight_name)
147
+ param_dict.pop(wk_weight_name)
148
+ param_dict.pop(wv_weight_name)
149
+ param_dict.pop(ffn_gate_weight_name)
150
+ param_dict.pop(ffn_hidden_weight_name)
151
+ logger.info(f"transform: {qkv_concat_weight_name}")
152
+ logger.info(f"transform: {gate_hidden_concat_weight_name}")
153
+
154
+ for i in range(assume_num_layers):
155
+ # qkv bias concat
156
+ wq_bias_name = f"model.layers.{i}.attention.wq.bias"
157
+ wk_bias_name = f"model.layers.{i}.attention.wk.bias"
158
+ wv_bias_name = f"model.layers.{i}.attention.wv.bias"
159
+ qkv_concat_bias_name = f"model.layers.{i}.attention.w_qkv.bias"
160
+ if wq_bias_name not in param_dict:
161
+ break
162
+
163
+ wq_bias_weight = param_dict[wq_bias_name].asnumpy()
164
+ wk_bias_weight = param_dict[wk_bias_name].asnumpy()
165
+ wv_bias_weight = param_dict[wv_bias_name].asnumpy()
166
+ qkv_bias_weight = np.concatenate(
167
+ (wq_bias_weight, wk_bias_weight, wv_bias_weight), 0
168
+ )
169
+ param_dict[qkv_concat_bias_name] = Parameter(
170
+ qkv_bias_weight, name=qkv_concat_bias_name
171
+ )
172
+
173
+ param_dict.pop(wq_bias_name)
174
+ param_dict.pop(wk_bias_name)
175
+ param_dict.pop(wv_bias_name)
176
+ logger.info(f"transform: {qkv_concat_bias_name}")
177
+ return param_dict
178
+
90
179
  def set_weights(self, parm_dict, dtype=mstype.float16):
91
180
  """set weights for llm boost"""
181
+ self._convert_qkv_concat_weight(parm_dict)
92
182
  embedding_weight_name = "model.tok_embeddings.embedding_weight"
93
183
  attention_norm_name = "attention_norm"
94
184
  qkv_name = "attention.w_qkv"
@@ -101,45 +191,88 @@ class AtbBoostBase():
101
191
  placeholder = Parameter(Tensor(np.zeros(1), dtype=dtype))
102
192
 
103
193
  ascend_weight = []
104
- ascend_weight.append(
105
- self.cast(parm_dict[embedding_weight_name], dtype))
194
+ ascend_weight.append(self.cast(parm_dict[embedding_weight_name], dtype))
106
195
  for i in range(self.num_layers):
107
- ascend_weight.append(self._convert_tensor_format_and_dtype(
108
- parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype))
196
+ ascend_weight.append(
197
+ self._convert_tensor_format_and_dtype(
198
+ parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype
199
+ )
200
+ )
109
201
  ascend_weight.extend([placeholder] * 3)
110
202
 
111
203
  ascend_weight.append(
112
- self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype))
113
- ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
114
- f"model.layers.{i}.{qkv_name}.bias", placeholder), dtype))
204
+ self._convert_tensor_format_and_dtype(
205
+ parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype
206
+ )
207
+ )
208
+ ascend_weight.append(
209
+ self._convert_tensor_format_and_dtype(
210
+ parm_dict.get(f"model.layers.{i}.{qkv_name}.bias", placeholder),
211
+ dtype,
212
+ )
213
+ )
115
214
  ascend_weight.extend([placeholder] * 16)
116
215
 
117
216
  ascend_weight.append(
118
- self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype))
119
- ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
120
- f"model.layers.{i}.{o_name}.bias", placeholder), dtype))
217
+ self._convert_tensor_format_and_dtype(
218
+ parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype
219
+ )
220
+ )
221
+ ascend_weight.append(
222
+ self._convert_tensor_format_and_dtype(
223
+ parm_dict.get(f"model.layers.{i}.{o_name}.bias", placeholder), dtype
224
+ )
225
+ )
121
226
  ascend_weight.extend([placeholder] * 4)
122
227
 
123
228
  ascend_weight.append(
124
- self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype))
229
+ self._convert_tensor_format_and_dtype(
230
+ parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype
231
+ )
232
+ )
125
233
  ascend_weight.extend([placeholder] * 3)
126
234
 
127
235
  ascend_weight.append(
128
- self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype))
129
- ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
130
- f"model.layers.{i}.{mlp_gate_name}.bias", placeholder), dtype))
236
+ self._convert_tensor_format_and_dtype(
237
+ parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype
238
+ )
239
+ )
240
+ ascend_weight.append(
241
+ self._convert_tensor_format_and_dtype(
242
+ parm_dict.get(
243
+ f"model.layers.{i}.{mlp_gate_name}.bias", placeholder
244
+ ),
245
+ dtype,
246
+ )
247
+ )
131
248
  ascend_weight.extend([placeholder] * 10)
132
249
 
133
250
  ascend_weight.append(
134
- self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype))
135
- ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
136
- f"model.layers.{i}.{mlp_down_name}.bias", placeholder), dtype))
251
+ self._convert_tensor_format_and_dtype(
252
+ parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype
253
+ )
254
+ )
255
+ ascend_weight.append(
256
+ self._convert_tensor_format_and_dtype(
257
+ parm_dict.get(
258
+ f"model.layers.{i}.{mlp_down_name}.bias", placeholder
259
+ ),
260
+ dtype,
261
+ )
262
+ )
137
263
  ascend_weight.extend([placeholder] * 4)
138
264
 
139
265
  ascend_weight.append(
140
- self._convert_tensor_format_and_dtype(parm_dict[f"{norm_out_name}.weight"], dtype))
266
+ self._convert_tensor_format_and_dtype(
267
+ parm_dict[f"{norm_out_name}.weight"], dtype
268
+ )
269
+ )
141
270
  ascend_weight.append(
142
- self._convert_tensor_format_and_dtype(parm_dict[f"{lm_head_name}.weight"], dtype))
271
+ self._convert_tensor_format_and_dtype(
272
+ parm_dict[f"{lm_head_name}.weight"], dtype
273
+ )
274
+ )
275
+ self.ascend_weight = ascend_weight
143
276
  self.atb_encoder_operation.set_weights(ascend_weight)
144
277
  self.atb_decoder_operation.set_weights(ascend_weight)
145
278
 
@@ -147,20 +280,47 @@ class AtbBoostBase():
147
280
  """set kv_cache for llm boost"""
148
281
  if not k_caches or v_caches:
149
282
  if self.need_nz:
150
- kv_shape = (self.config.num_blocks, self.num_kv_heads*self.head_dim //
151
- self.device_num // 16, self.config.block_size, 16)
152
- k_caches = [_set_format(Parameter(Tensor(
153
- shape=kv_shape, dtype=self.dtype, init=Zero())), "FRACTAL_NZ") for _ in range(self.num_layers)]
154
- v_caches = [_set_format(Parameter(Tensor(
155
- shape=kv_shape, dtype=self.dtype, init=Zero())), "FRACTAL_NZ") for _ in range(self.num_layers)]
283
+ kv_shape = (
284
+ self.config.num_blocks,
285
+ self.num_kv_heads * self.head_dim // self.device_num // 16,
286
+ self.config.block_size,
287
+ 16,
288
+ )
289
+ k_caches = [
290
+ _set_format(
291
+ Parameter(
292
+ Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
293
+ ),
294
+ FORMAT_NZ,
295
+ )
296
+ for _ in range(self.num_layers)
297
+ ]
298
+ v_caches = [
299
+ _set_format(
300
+ Parameter(
301
+ Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
302
+ ),
303
+ FORMAT_NZ,
304
+ )
305
+ for _ in range(self.num_layers)
306
+ ]
156
307
  else:
157
- kv_shape = (self.config.num_blocks, self.config.block_size,
158
- self.num_kv_heads // self.device_num, self.head_dim)
159
- k_caches = [Parameter(Tensor(
160
- shape=kv_shape, dtype=self.dtype, init=Zero())) for _ in range(self.num_layers)]
161
- v_caches = [Parameter(Tensor(
162
- shape=kv_shape, dtype=self.dtype, init=Zero())) for _ in range(self.num_layers)]
163
-
308
+ kv_shape = (
309
+ self.config.num_blocks,
310
+ self.config.block_size,
311
+ self.num_kv_heads // self.device_num,
312
+ self.head_dim,
313
+ )
314
+ k_caches = [
315
+ Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
316
+ for _ in range(self.num_layers)
317
+ ]
318
+ v_caches = [
319
+ Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
320
+ for _ in range(self.num_layers)
321
+ ]
322
+ self.k_caches = k_caches
323
+ self.v_caches = v_caches
164
324
  self.atb_encoder_operation.set_kvcache(k_caches, v_caches)
165
325
  self.atb_decoder_operation.set_kvcache(k_caches, v_caches)
166
326
 
@@ -171,11 +331,9 @@ class AtbBoostBase():
171
331
  def _execute_operator(self, acl_inputs, acl_param):
172
332
  """execute operator."""
173
333
  if self.is_first_iteration:
174
- acl_model_out = self.atb_encoder_operation.forward(
175
- acl_inputs, acl_param)
334
+ acl_model_out = self.atb_encoder_operation.forward(acl_inputs, acl_param)
176
335
  else:
177
- acl_model_out = self.atb_decoder_operation.forward(
178
- acl_inputs, acl_param)
336
+ acl_model_out = self.atb_decoder_operation.forward(acl_inputs, acl_param)
179
337
  acl_hidden_state = acl_model_out[0]
180
338
  return acl_hidden_state
181
339
 
@@ -183,28 +341,46 @@ class AtbBoostBase():
183
341
  r"""
184
342
  LlmBoost forward.
185
343
  """
186
- input_ids = boost_inputs["input_ids"]
187
- position_ids = boost_inputs["position_ids"]
188
- cos_embed = boost_inputs["cos_embed"]
189
- sin_embed = boost_inputs["sin_embed"]
190
- block_tables = boost_inputs["block_tables"]
191
- slot_mapping = boost_inputs["slot_mapping"]
192
- batch_valid_length = boost_inputs["batch_valid_length"]
193
- lm_head_indices = boost_inputs["lm_head_indices"]
194
- seqLen = boost_inputs["seq_lens"]
344
+ input_ids = boost_inputs.get("input_ids", None)
345
+ position_ids = boost_inputs.get("position_ids", None)
346
+ cos_embed = boost_inputs.get("cos_embed", None)
347
+ sin_embed = boost_inputs.get("sin_embed", None)
348
+ block_tables = boost_inputs.get("block_tables", None)
349
+ slot_mapping = boost_inputs.get("slot_mapping", None)
350
+ batch_valid_length = boost_inputs.get("batch_valid_length", None)
351
+ lm_head_indices = boost_inputs.get("lm_head_indices", None)
352
+ seqLen = boost_inputs.get("seq_lens", None)
353
+ input_ids = self.reshape(input_ids, (-1,))
195
354
  if self.is_first_iteration:
196
355
  attention_mask = self.attn_mask
197
356
  else:
198
- position_ids = batch_valid_length - 1
357
+ if position_ids is None:
358
+ position_ids = batch_valid_length - 1
199
359
  attention_mask = self.placeholder
200
360
  lm_head_indices = self.lm_head_indices_fake
201
361
 
202
- acl_inputs, acl_param = self._prepare_inputs(prefill=self.is_first_iteration, input_ids=input_ids,
203
- position_ids=position_ids, cos_embed=cos_embed,
204
- sin_embed=sin_embed, attention_mask=attention_mask,
205
- block_tables=block_tables, slots=slot_mapping,
206
- input_lengths=batch_valid_length, lm_head_indices=lm_head_indices,
207
- seqLen=seqLen)
362
+ if input_ids is not None and input_ids.dtype != mstype.int64:
363
+ input_ids = self.cast(input_ids, mstype.int64)
364
+ if position_ids is not None and position_ids.dtype != mstype.int64:
365
+ position_ids = self.cast(position_ids, mstype.int64)
366
+ if batch_valid_length is not None and batch_valid_length.dtype != mstype.int32:
367
+ batch_valid_length = self.cast(batch_valid_length, mstype.int32)
368
+ if lm_head_indices is not None and lm_head_indices.dtype != mstype.int64:
369
+ lm_head_indices = self.cast(lm_head_indices, mstype.int64)
370
+
371
+ acl_inputs, acl_param = self._prepare_inputs(
372
+ prefill=self.is_first_iteration,
373
+ input_ids=input_ids,
374
+ position_ids=position_ids,
375
+ cos_embed=cos_embed,
376
+ sin_embed=sin_embed,
377
+ attention_mask=attention_mask,
378
+ block_tables=block_tables,
379
+ slots=slot_mapping,
380
+ input_lengths=batch_valid_length,
381
+ lm_head_indices=lm_head_indices,
382
+ seqLen=seqLen,
383
+ )
208
384
  ms.hal.synchronize()
209
385
  logits = self._execute_operator(acl_inputs, acl_param)
210
386
  logits = self.cast(logits, mstype.float32)
@@ -15,10 +15,16 @@
15
15
  """llm boost"""
16
16
  import json
17
17
  import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
18
+ from mindspore.experimental.llm_boost.atb.boost_base import (
19
+ AtbBoostBase,
20
+ PositionEmbeddingType,
21
+ NormType,
22
+ )
19
23
  from mindspore._c_expression import LlmBoostBinder
20
24
  from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
25
 
26
+ CPP_LLAMA_MODEL_CLASS_NAME = "llama_LlamaDecoderModel"
27
+
22
28
 
23
29
  @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
24
30
  class LlamaBoost(AtbBoostBase):
@@ -30,14 +36,17 @@ class LlamaBoost(AtbBoostBase):
30
36
  self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
37
  self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
38
  self.atb_encoder_operation = LlmBoostBinder(
33
- "ATB", "llama_parallel_DecoderModel")
39
+ self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
40
+ )
34
41
  self.atb_decoder_operation = LlmBoostBinder(
35
- "ATB", "llama_parallel_DecoderModel")
42
+ self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
43
+ )
36
44
 
37
45
  def init(self):
38
46
  """set param"""
39
47
  coder_param = {
40
- "rmsNormEps": self.config.rms_norm_eps,
48
+ "normEps": self.config.rms_norm_eps,
49
+ "normType": NormType.RMS_NORM,
41
50
  "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
42
51
  "hiddenSizePerAttentionHead": self.head_dim,
43
52
  "numHiddenLayers": self.num_layers,
@@ -46,32 +55,41 @@ class LlamaBoost(AtbBoostBase):
46
55
  "isFA": False,
47
56
  "isBF16": self.dtype == mstype.bfloat16,
48
57
  "packQuantType": [[1, 1] for _ in range(self.num_layers)],
49
- "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
50
- "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
58
+ "linearQuantType": [
59
+ [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
60
+ ],
61
+ "linearTransposeType": [
62
+ [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
63
+ ],
51
64
  "isEmbeddingParallel": False,
52
65
  "isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
53
66
  "lmHeadTransposeType": 1,
54
- "supportSwiGLU": True,
55
- "kvQuant": self.kv_quant is not None,
67
+ "enableSwiGLU": True,
68
+ "enablekvQuant": self.kv_quant is not None,
56
69
  "rank": self.rank_id,
57
70
  "worldSize": self.device_num,
58
- "backend": "lccl",
71
+ "backend": self.config.communication_backend,
59
72
  "rankTableFile": "",
60
- "positionEmbeddingType": self.position_embedding_type,
73
+ "positionEmbeddingType": PositionEmbeddingType.ROPE,
61
74
  "hiddenSize": self.config.hidden_size,
62
75
  "gemma": False,
63
- "enableAddNorm": True,
64
- "supportCompressHead": False,
76
+ "enableAddNorm": False,
77
+ "enableCompressHead": False,
78
+ "isUnpadInputs": True,
65
79
  }
66
80
  encoder_param = {
67
- **coder_param, "isPrefill": True,
68
- "supportLcoc": True,
69
- "supportSpeculate": False,
70
- "skipWordEmbedding": False
81
+ **coder_param,
82
+ "isPrefill": True,
83
+ "enableLcoc": True,
84
+ "enableSpeculate": False,
85
+ "skipWordEmbedding": False,
86
+ "enableSplitFuse": False,
71
87
  }
72
88
  decoder_param = {
73
- **coder_param, "isPrefill": False, "supportLcoc": False,
74
- "supportSpeculate": False
89
+ **coder_param,
90
+ "isPrefill": False,
91
+ "enableLcoc": False,
92
+ "enableSpeculate": False,
75
93
  }
76
94
  self.atb_encoder_operation.init(json.dumps({**encoder_param}))
77
95
  self.atb_decoder_operation.init(json.dumps({**decoder_param}))
@@ -92,14 +110,15 @@ class LlamaBoost(AtbBoostBase):
92
110
  **kwargs
93
111
  ):
94
112
  """prepare inputs"""
95
- self.acl_param = json.dumps({
96
- "seqLen": seqLen,
97
- })
98
- self.acl_decoder_operation_inputs[0] = self.cast(
99
- input_ids, mstype.int64)
113
+ self.acl_param = json.dumps(
114
+ {
115
+ "seqLen": seqLen,
116
+ }
117
+ )
118
+
119
+ self.acl_decoder_operation_inputs[0] = input_ids
100
120
  self.acl_decoder_operation_inputs[1] = self.placeholder
101
- self.acl_decoder_operation_inputs[2] = self.cast(
102
- position_ids, mstype.int32)
121
+ self.acl_decoder_operation_inputs[2] = position_ids
103
122
  self.acl_decoder_operation_inputs[3] = cos_embed
104
123
  self.acl_decoder_operation_inputs[4] = sin_embed
105
124
  self.acl_decoder_operation_inputs[5] = attention_mask
@@ -108,8 +127,6 @@ class LlamaBoost(AtbBoostBase):
108
127
  self.acl_decoder_operation_inputs[8] = self.placeholder
109
128
  self.acl_decoder_operation_inputs[9] = self.placeholder
110
129
  self.acl_decoder_operation_inputs[10] = self.placeholder
111
- self.acl_decoder_operation_inputs[11] = self.cast(
112
- input_lengths, mstype.int32)
113
- self.acl_decoder_operation_inputs[12] = self.cast(
114
- lm_head_indices, mstype.int64)
130
+ self.acl_decoder_operation_inputs[11] = input_lengths
131
+ self.acl_decoder_operation_inputs[12] = lm_head_indices
115
132
  return self.acl_decoder_operation_inputs, self.acl_param
@@ -15,11 +15,14 @@
15
15
  """llm boost"""
16
16
  import json
17
17
  import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
18
+ from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase, NormType
19
19
  from mindspore._c_expression import LlmBoostBinder
20
20
  from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
21
 
22
22
 
23
+ CPP_QWEN_MODEL_CLASS_NAME = "qwen_QwenDecoderModel"
24
+
25
+
23
26
  @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
24
27
  class QwenBoost(AtbBoostBase):
25
28
  """QwenBoost class"""
@@ -30,9 +33,11 @@ class QwenBoost(AtbBoostBase):
30
33
  self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
34
  self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
35
  self.atb_encoder_operation = LlmBoostBinder(
33
- "ATB", "qwen_DecoderModel")
36
+ self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
37
+ )
34
38
  self.atb_decoder_operation = LlmBoostBinder(
35
- "ATB", "qwen_DecoderModel")
39
+ self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
40
+ )
36
41
 
37
42
  def init(self):
38
43
  """set param"""
@@ -42,24 +47,43 @@ class QwenBoost(AtbBoostBase):
42
47
  "withEmbedding": True,
43
48
  "isEmbeddingParallel": True,
44
49
  "isLmHeadParallel": True,
45
- "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
50
+ "linearTransposeType": [
51
+ [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
52
+ ],
46
53
  "lmHeadTransposeType": 1,
47
- "supportSwiGLU": not self.need_nz,
48
- "rmsNormEps": self.config.rms_norm_eps,
54
+ "enableSwiGLU": not self.need_nz,
55
+ "normEps": self.config.rms_norm_eps,
56
+ "normType": NormType.RMS_NORM,
49
57
  "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
50
58
  "hiddenSizePerAttentionHead": self.head_dim,
51
59
  "numHiddenLayers": self.num_layers,
52
60
  "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
53
61
  "rank": self.rank_id,
54
62
  "worldSize": self.device_num,
55
- "backend": "lccl",
63
+ "backend": self.config.communication_backend,
56
64
  "packQuantType": [[1, 1] for _ in range(self.num_layers)],
57
- "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
58
- "kvQuant": self.kv_quant is not None,
65
+ "linearQuantType": [
66
+ [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
67
+ ],
68
+ "linearHasBias": [[True, False, False, False]] * self.num_layers,
69
+ "enableKvQuant": self.kv_quant is not None,
70
+ "enableLora": False,
71
+ "isUnpadInputs": True,
72
+ "enableAddNorm": False,
73
+ }
74
+ encoder_param = {
75
+ **param_dict,
76
+ "isPrefill": True,
77
+ "enableLcoc": False,
78
+ "enableSplitFuse": False,
79
+ }
80
+ decoder_param = {
81
+ **param_dict,
82
+ "isPrefill": False,
83
+ "enableLcoc": False,
84
+ "enableSpeculate": False,
85
+ "enablePrefixCache": False,
59
86
  }
60
- encoder_param = {**param_dict, "isPrefill": True, "supportLcoc": False}
61
- decoder_param = {**param_dict, "isPrefill": False,
62
- "supportLcoc": False, "supportSpeculate": False}
63
87
  self.atb_encoder_operation.init(json.dumps({**encoder_param}))
64
88
  self.atb_decoder_operation.init(json.dumps({**decoder_param}))
65
89
 
@@ -79,13 +103,14 @@ class QwenBoost(AtbBoostBase):
79
103
  **kwargs
80
104
  ):
81
105
  """prepare inputs"""
82
- self.acl_param = json.dumps({
83
- "seqLen": seqLen,
84
- })
85
- self.acl_decoder_operation_inputs[0] = self.cast(
86
- input_ids, mstype.int64)
87
- self.acl_decoder_operation_inputs[1] = self.cast(
88
- position_ids, mstype.int32)
106
+ self.acl_param = json.dumps(
107
+ {
108
+ "seqLen": seqLen,
109
+ }
110
+ )
111
+
112
+ self.acl_decoder_operation_inputs[0] = input_ids
113
+ self.acl_decoder_operation_inputs[1] = position_ids
89
114
  self.acl_decoder_operation_inputs[2] = cos_embed
90
115
  self.acl_decoder_operation_inputs[3] = sin_embed
91
116
  self.acl_decoder_operation_inputs[4] = attention_mask
@@ -93,9 +118,7 @@ class QwenBoost(AtbBoostBase):
93
118
  self.acl_decoder_operation_inputs[6] = slots
94
119
  self.acl_decoder_operation_inputs[7] = self.placeholder
95
120
  self.acl_decoder_operation_inputs[8] = self.placeholder
96
- self.acl_decoder_operation_inputs[9] = self.cast(
97
- input_lengths, mstype.int32)
98
- self.acl_decoder_operation_inputs[10] = self.cast(
99
- lm_head_indices, mstype.int64)
100
- self.acl_decoder_operation_inputs[11] = self.placeholder
121
+ self.acl_decoder_operation_inputs[9] = self.placeholder
122
+ self.acl_decoder_operation_inputs[10] = input_lengths
123
+ self.acl_decoder_operation_inputs[11] = lm_head_indices
101
124
  return self.acl_decoder_operation_inputs, self.acl_param