@novastera-oss/llamarn 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/RNLlamaCpp.podspec +3 -2
  2. package/android/CMakeLists.txt +6 -3
  3. package/android/src/main/cpp/include/llama.h +140 -38
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  12. package/cpp/LlamaCppModel.cpp +48 -67
  13. package/cpp/LlamaCppModel.h +8 -3
  14. package/cpp/PureCppImpl.cpp +1 -1
  15. package/cpp/PureCppImpl.h +2 -2
  16. package/cpp/build-info.cpp +2 -2
  17. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  18. package/cpp/llama.cpp/Makefile +2 -2
  19. package/cpp/llama.cpp/README.md +33 -13
  20. package/cpp/llama.cpp/common/CMakeLists.txt +15 -28
  21. package/cpp/llama.cpp/common/arg.cpp +38 -12
  22. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  23. package/cpp/llama.cpp/common/chat-parser.cpp +9 -3
  24. package/cpp/llama.cpp/common/chat-parser.h +4 -1
  25. package/cpp/llama.cpp/common/chat.cpp +16 -13
  26. package/cpp/llama.cpp/common/chat.h +1 -1
  27. package/cpp/llama.cpp/common/common.cpp +52 -40
  28. package/cpp/llama.cpp/common/common.h +5 -2
  29. package/cpp/llama.cpp/common/json-partial.cpp +5 -4
  30. package/cpp/llama.cpp/common/json-partial.h +2 -1
  31. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
  32. package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
  33. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  34. package/cpp/llama.cpp/convert_hf_to_gguf.py +128 -84
  35. package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -2
  36. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  37. package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
  38. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +49 -13
  39. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
  41. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +93 -24
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2174 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +7 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +33 -2
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -2
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1555 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +2 -4
  74. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +6 -8
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +25 -16
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  79. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  82. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +33 -8
  83. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +135 -100
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +908 -3
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  88. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  90. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  93. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  94. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +19 -24
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +21 -2
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +121 -4
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +2 -96
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +164 -46
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +32 -8
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  107. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +26 -29
  108. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -248
  109. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -12
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  112. package/cpp/llama.cpp/ggml/src/ggml.c +9 -8
  113. package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
  114. package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
  115. package/cpp/llama.cpp/gguf-py/gguf/constants.py +57 -0
  116. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +4 -1
  117. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +14 -3
  118. package/cpp/llama.cpp/include/llama.h +140 -38
  119. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  120. package/cpp/llama.cpp/src/CMakeLists.txt +4 -1
  121. package/cpp/llama.cpp/src/llama-arch.cpp +95 -3
  122. package/cpp/llama.cpp/src/llama-arch.h +7 -1
  123. package/cpp/llama.cpp/src/llama-batch.cpp +289 -31
  124. package/cpp/llama.cpp/src/llama-batch.h +47 -17
  125. package/cpp/llama.cpp/src/llama-chat.cpp +19 -2
  126. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  127. package/cpp/llama.cpp/src/llama-context.cpp +488 -313
  128. package/cpp/llama.cpp/src/llama-context.h +38 -17
  129. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  130. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  131. package/cpp/llama.cpp/src/llama-graph.cpp +275 -152
  132. package/cpp/llama.cpp/src/llama-graph.h +109 -52
  133. package/cpp/llama.cpp/src/llama-hparams.cpp +6 -2
  134. package/cpp/llama.cpp/src/llama-hparams.h +8 -2
  135. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +281 -0
  136. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +133 -0
  137. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1835 -0
  138. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +308 -0
  139. package/cpp/llama.cpp/src/llama-kv-cells.h +53 -17
  140. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +247 -0
  141. package/cpp/llama.cpp/src/llama-memory-hybrid.h +143 -0
  142. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +1116 -0
  143. package/cpp/llama.cpp/src/llama-memory-recurrent.h +188 -0
  144. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  145. package/cpp/llama.cpp/src/llama-memory.h +89 -4
  146. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  147. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  148. package/cpp/llama.cpp/src/llama-model.cpp +735 -143
  149. package/cpp/llama.cpp/src/llama-model.h +4 -0
  150. package/cpp/llama.cpp/src/llama-quant.cpp +2 -1
  151. package/cpp/llama.cpp/src/llama-vocab.cpp +39 -25
  152. package/cpp/llama.cpp/src/llama.cpp +11 -7
  153. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  154. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
  155. package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
  156. package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
  157. package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
  158. package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
  159. package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  160. package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
  161. package/cpp/rn-completion.cpp +65 -10
  162. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  163. package/cpp/{rn-utils.hpp → rn-utils.h} +8 -1
  164. package/ios/include/chat.h +1 -1
  165. package/ios/include/common/minja/chat-template.hpp +1 -1
  166. package/ios/include/common/minja/minja.hpp +1 -1
  167. package/ios/include/common.h +5 -2
  168. package/ios/include/json-schema-to-grammar.h +4 -4
  169. package/ios/include/llama.h +140 -38
  170. package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
  171. package/ios/libs/llama.xcframework/Info.plist +20 -20
  172. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  173. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4617
  174. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
  175. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +140 -38
  176. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  177. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  178. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
  179. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3557
  180. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  181. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  182. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  183. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  184. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
  185. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3744 -3559
  186. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
  187. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +140 -38
  188. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
  189. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +140 -38
  190. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  191. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
  192. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +140 -38
  193. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  194. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  195. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  196. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4616
  197. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
  198. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +140 -38
  199. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  200. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  201. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4637
  202. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3556
  203. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  204. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  205. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  206. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  207. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4900 -4653
  208. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
  209. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +140 -38
  210. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  211. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  212. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4871 -4674
  213. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3773 -3587
  214. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  215. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  216. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  217. package/package.json +1 -2
  218. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  219. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  220. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  221. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2747
  222. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -502
  223. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  224. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  225. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -1340,7 +1340,10 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
1340
1340
  // allocate graph
1341
1341
  if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1342
1342
  // the re-allocation may cause the split inputs to be moved to a different address
1343
- ggml_backend_sched_synchronize(sched);
1343
+ // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
1344
+ for (int i = 0; i < sched->n_backends; i++) {
1345
+ ggml_backend_synchronize(sched->backends[i]);
1346
+ }
1344
1347
  #ifndef NDEBUG
1345
1348
  GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
1346
1349
  #endif
@@ -1564,7 +1567,6 @@ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgra
1564
1567
 
1565
1568
  ggml_backend_sched_split_graph(sched, graph);
1566
1569
 
1567
-
1568
1570
  if (!ggml_backend_sched_alloc_splits(sched)) {
1569
1571
  return false;
1570
1572
  }
@@ -1598,9 +1600,12 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
1598
1600
  for (int i = 0; i < sched->n_backends; i++) {
1599
1601
  ggml_backend_synchronize(sched->backends[i]);
1600
1602
  }
1601
- // reset the current copy to 0 so that the graphs will be similar during generation
1602
- // necessary for CUDA graphs
1603
- sched->cur_copy = 0;
1603
+ if (!sched->is_alloc) {
1604
+ // if the graph is not already allocated, always use copy 0 after a synchronization
1605
+ // this ensures that during generation the same copy is used every time,
1606
+ // which avoids changes in the graph that could cause CUDA or other graphs to be disabled
1607
+ sched->cur_copy = 0;
1608
+ }
1604
1609
  }
1605
1610
 
1606
1611
  void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
81
81
  target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
82
82
  target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
83
83
  else()
84
- message(ERROR "BLAS not found, please refer to "
85
- "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86
- " to set correct GGML_BLAS_VENDOR")
84
+ message(FATAL_ERROR "BLAS not found, please refer to "
85
+ "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86
+ " to set correct GGML_BLAS_VENDOR")
87
87
  endif()
@@ -37,6 +37,7 @@
37
37
  #include <thread>
38
38
  #include <unistd.h>
39
39
  #include <functional>
40
+ #include <optional>
40
41
 
41
42
  #include "../include/ggml-cann.h"
42
43
  #include "../include/ggml.h"
@@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
103
104
  void ggml_cann_set_device(int32_t device);
104
105
  int32_t ggml_cann_get_device();
105
106
 
107
+ std::optional<std::string> get_env(const std::string& name);
108
+ bool parse_bool(const std::string& value);
109
+
106
110
  /**
107
111
  * @brief Abstract base class for memory pools used by CANN.
108
112
  */
@@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
354
358
  : device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
355
359
  ggml_cann_set_device(device);
356
360
  description = aclrtGetSocName();
357
- async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
361
+
362
+ bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
358
363
  GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
359
364
  device, async_mode ? "ON" : "OFF");
360
365
  }
@@ -31,6 +31,8 @@
31
31
  #include <mutex>
32
32
  #include <queue>
33
33
  #include <chrono>
34
+ #include <unordered_set>
35
+ #include <optional>
34
36
 
35
37
  #include "ggml-impl.h"
36
38
  #include "ggml-backend-impl.h"
@@ -93,6 +95,26 @@ int32_t ggml_cann_get_device() {
93
95
  return id;
94
96
  }
95
97
 
98
+ /**
99
+ * @brief Get the value of the specified environment variable (name).
100
+ * if not empty, return a std::string object
101
+ */
102
+ std::optional<std::string> get_env(const std::string& name) {
103
+ const char* val = std::getenv(name.c_str());
104
+ if (!val) return std::nullopt;
105
+ std::string res = std::string(val);
106
+ std::transform(res.begin(), res.end(), res.begin(), ::tolower);
107
+ return res;
108
+ }
109
+
110
+ /**
111
+ * @brief Verify whether the environment variable is a valid value.
112
+ */
113
+ bool parse_bool(const std::string& value) {
114
+ std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
115
+ return valid_values.find(value) != valid_values.end();
116
+ }
117
+
96
118
  /**
97
119
  * @brief Initialize the CANN device information.
98
120
  *
@@ -214,7 +236,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
214
236
  * @param device The device ID to associate with this buffer pool.
215
237
  */
216
238
  explicit ggml_cann_pool_buf_prio(int device) : device(device) {
217
- disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
239
+ disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
218
240
  }
219
241
 
220
242
  /**
@@ -410,7 +432,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
410
432
  * @param device The device ID to associate with this buffer pool.
411
433
  */
412
434
  explicit ggml_cann_pool_buf(int device) : device(device) {
413
- disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
435
+ disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
414
436
  }
415
437
 
416
438
  /**
@@ -731,16 +753,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
731
753
  */
732
754
  std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
733
755
  int device) {
734
- bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr);
735
- if (!disable_vmm && ggml_cann_info().devices[device].vmm) {
736
- GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
737
- return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
738
- }
739
- bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
740
- if (enable_buf_prio) {
756
+ std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
757
+
758
+ if (mem_pool_type == "prio") {
741
759
  GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
742
760
  return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
743
761
  }
762
+
763
+ if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
764
+ GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
765
+ return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
766
+ }
767
+
744
768
  GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
745
769
  return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
746
770
  }
@@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
1074
1074
  0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
1075
1075
  GGML_TABLE_END()
1076
1076
 
1077
+ GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078
+ -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079
+ GGML_TABLE_END()
1080
+
1077
1081
  #define NGRID_IQ1S 2048
1078
1082
  #define IQ1S_DELTA 0.125f
1079
1083
  #define IQ1M_DELTA 0.125f
@@ -1,3 +1,17 @@
1
+ function(ggml_add_cpu_backend_features cpu_name arch)
2
+ # The feature detection code is compiled as a separate target so that
3
+ # it can be built without the architecture flags
4
+ # Since multiple variants of the CPU backend may be included in the same
5
+ # build, using set_source_files_properties() to set the arch flags is not possible
6
+ set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7
+ add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
9
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11
+ set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
12
+ target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
13
+ endfunction()
14
+
1
15
  function(ggml_add_cpu_backend_variant_impl tag_name)
2
16
  if (tag_name)
3
17
  set(GGML_CPU_NAME ggml-cpu-${tag_name})
@@ -10,14 +24,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
10
24
  list (APPEND GGML_CPU_SOURCES
11
25
  ggml-cpu/ggml-cpu.c
12
26
  ggml-cpu/ggml-cpu.cpp
13
- ggml-cpu/ggml-cpu-aarch64.cpp
14
- ggml-cpu/ggml-cpu-aarch64.h
15
- ggml-cpu/ggml-cpu-hbm.cpp
16
- ggml-cpu/ggml-cpu-hbm.h
17
- ggml-cpu/ggml-cpu-quants.c
18
- ggml-cpu/ggml-cpu-quants.h
19
- ggml-cpu/ggml-cpu-traits.cpp
20
- ggml-cpu/ggml-cpu-traits.h
27
+ ggml-cpu/repack.cpp
28
+ ggml-cpu/repack.h
29
+ ggml-cpu/hbm.cpp
30
+ ggml-cpu/hbm.h
31
+ ggml-cpu/quants.c
32
+ ggml-cpu/quants.h
33
+ ggml-cpu/traits.cpp
34
+ ggml-cpu/traits.h
21
35
  ggml-cpu/amx/amx.cpp
22
36
  ggml-cpu/amx/amx.h
23
37
  ggml-cpu/amx/mmq.cpp
@@ -84,6 +98,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
84
98
 
85
99
  if (GGML_SYSTEM_ARCH STREQUAL "ARM")
86
100
  message(STATUS "ARM detected")
101
+ list(APPEND GGML_CPU_SOURCES
102
+ ggml-cpu/arch/arm/quants.c
103
+ ggml-cpu/arch/arm/repack.cpp
104
+ )
105
+
87
106
  if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
88
107
  message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
89
108
  else()
@@ -138,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
138
157
  else()
139
158
  if (GGML_CPU_ARM_ARCH)
140
159
  list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
160
+ elseif(GGML_CPU_ALL_VARIANTS)
161
+ # Begin with the lowest baseline
162
+ set(ARM_MCPU "armv8-a")
163
+ set(ARCH_TAGS "")
164
+ set(ARCH_DEFINITIONS "")
165
+
166
+ # When a feature is selected, bump the MCPU to the first
167
+ # version that supported it
168
+ if (GGML_INTERNAL_DOTPROD)
169
+ set(ARM_MCPU "armv8.2-a")
170
+ set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
171
+ list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
172
+ endif()
173
+ if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
174
+ set(ARM_MCPU "armv8.2-a")
175
+ set(ARCH_TAGS "${ARCH_TAGS}+fp16")
176
+ list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
177
+ endif()
178
+ if (GGML_INTERNAL_SVE)
179
+ set(ARM_MCPU "armv8.2-a")
180
+ set(ARCH_TAGS "${ARCH_TAGS}+sve")
181
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
182
+ endif()
183
+ if (GGML_INTERNAL_MATMUL_INT8)
184
+ set(ARM_MCPU "armv8.6-a")
185
+ set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
186
+ list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
187
+ endif()
188
+ if (GGML_INTERNAL_SVE2)
189
+ set(ARM_MCPU "armv8.6-a")
190
+ set(ARCH_TAGS "${ARCH_TAGS}+sve2")
191
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
192
+ endif()
193
+ if (GGML_INTERNAL_NOSVE)
194
+ set(ARCH_TAGS "${ARCH_TAGS}+nosve")
195
+ endif()
196
+ if (GGML_INTERNAL_SME)
197
+ set(ARM_MCPU "armv9.2-a")
198
+ set(ARCH_TAGS "${ARCH_TAGS}+sme")
199
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
200
+ endif()
201
+ list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
202
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
141
203
  endif()
142
204
  endif()
143
205
 
@@ -167,6 +229,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
167
229
  endif()
168
230
  elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
169
231
  message(STATUS "x86 detected")
232
+ list(APPEND GGML_CPU_SOURCES
233
+ ggml-cpu/arch/x86/quants.c
234
+ ggml-cpu/arch/x86/repack.cpp
235
+ )
236
+
170
237
  if (MSVC)
171
238
  # instruction set detection for MSVC only
172
239
  if (GGML_NATIVE)
@@ -296,21 +363,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
296
363
  # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
297
364
  message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
298
365
  endif()
299
-
300
- # The feature detection code is compiled as a separate target so that
301
- # it can be built without the architecture flags
302
- # Since multiple variants of the CPU backend may be included in the same
303
- # build, using set_source_files_properties() to set the arch flags is not possible
304
- set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
305
- add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
306
- target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
307
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
308
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
309
- set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
310
- target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
366
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
311
367
  endif()
312
368
  elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
313
369
  message(STATUS "PowerPC detected")
370
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
314
371
  if (GGML_NATIVE)
315
372
  if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
316
373
  file(READ "/proc/cpuinfo" POWER10_M)
@@ -318,7 +375,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
318
375
  execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
319
376
  endif()
320
377
 
321
- string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
378
+ string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
379
+ string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
322
380
  string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
323
381
 
324
382
  if (EXTRACTED_NUMBER GREATER_EQUAL 10)
@@ -337,6 +395,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
337
395
  endif()
338
396
  elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
339
397
  message(STATUS "loongarch64 detected")
398
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
399
+
340
400
  list(APPEND ARCH_FLAGS -march=loongarch64)
341
401
  if (GGML_LASX)
342
402
  list(APPEND ARCH_FLAGS -mlasx)
@@ -346,6 +406,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
346
406
  endif()
347
407
  elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
348
408
  message(STATUS "riscv64 detected")
409
+ list(APPEND GGML_CPU_SOURCES
410
+ ggml-cpu/arch/riscv/quants.c
411
+ ggml-cpu/arch/riscv/repack.cpp
412
+ )
349
413
  if (GGML_RVV)
350
414
  if (GGML_XTHEADVECTOR)
351
415
  list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
@@ -357,6 +421,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
357
421
  endif()
358
422
  elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
359
423
  message(STATUS "s390x detected")
424
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
360
425
  file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
361
426
  string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
362
427
 
@@ -380,12 +445,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
380
445
  if (GGML_VXE)
381
446
  list(APPEND ARCH_FLAGS -mvx -mzvector)
382
447
  endif()
448
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
449
+ message(STATUS "Wasm detected")
450
+ list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
383
451
  else()
384
- message(STATUS "Unknown architecture")
452
+ message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
453
+ list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
385
454
  endif()
386
455
 
387
- if (GGML_CPU_AARCH64)
388
- target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
456
+ if (GGML_CPU_REPACK)
457
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
389
458
  endif()
390
459
 
391
460
  if (GGML_CPU_KLEIDIAI)
@@ -5,7 +5,7 @@
5
5
  #include "ggml-backend.h"
6
6
  #include "ggml-impl.h"
7
7
  #include "ggml-cpu.h"
8
- #include "ggml-cpu-traits.h"
8
+ #include "traits.h"
9
9
 
10
10
  #if defined(__gnu_linux__)
11
11
  #include <sys/syscall.h>
@@ -8,7 +8,7 @@
8
8
  #include "mmq.h"
9
9
  #include "ggml-impl.h"
10
10
  #include "ggml-cpu-impl.h"
11
- #include "ggml-cpu-quants.h"
11
+ #include "quants.h"
12
12
  #include "ggml-quants.h"
13
13
  #include <algorithm>
14
14
  #include <type_traits>
@@ -0,0 +1,94 @@
1
+ #include "ggml-backend-impl.h"
2
+
3
+ #if defined(__aarch64__)
4
+
5
+ #if defined(__linux__)
6
+ #include <sys/auxv.h>
7
+ #elif defined(__APPLE__)
8
+ #include <sys/sysctl.h>
9
+ #endif
10
+
11
+ #if !defined(HWCAP2_I8MM)
12
+ #define HWCAP2_I8MM (1 << 13)
13
+ #endif
14
+
15
+ #if !defined(HWCAP2_SME)
16
+ #define HWCAP2_SME (1 << 23)
17
+ #endif
18
+
19
+ struct aarch64_features {
20
+ // has_neon not needed, aarch64 has NEON guaranteed
21
+ bool has_dotprod = false;
22
+ bool has_fp16_va = false;
23
+ bool has_sve = false;
24
+ bool has_sve2 = false;
25
+ bool has_i8mm = false;
26
+ bool has_sme = false;
27
+
28
+ aarch64_features() {
29
+ #if defined(__linux__)
30
+ uint32_t hwcap = getauxval(AT_HWCAP);
31
+ uint32_t hwcap2 = getauxval(AT_HWCAP2);
32
+
33
+ has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
34
+ has_fp16_va = !!(hwcap & HWCAP_FPHP);
35
+ has_sve = !!(hwcap & HWCAP_SVE);
36
+ has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
37
+ has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
38
+ has_sme = !!(hwcap2 & HWCAP2_SME);
39
+ #elif defined(__APPLE__)
40
+ int oldp = 0;
41
+ size_t size = sizeof(oldp);
42
+
43
+ if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
44
+ has_dotprod = static_cast<bool>(oldp);
45
+ }
46
+
47
+ if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
48
+ has_i8mm = static_cast<bool>(oldp);
49
+ }
50
+
51
+ if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
52
+ has_sme = static_cast<bool>(oldp);
53
+ }
54
+
55
+ // Apple apparently does not implement SVE yet
56
+ #endif
57
+ }
58
+ };
59
+
60
+ static int ggml_backend_cpu_aarch64_score() {
61
+ int score = 1;
62
+ aarch64_features af;
63
+
64
+ #ifdef GGML_USE_DOTPROD
65
+ if (!af.has_dotprod) { return 0; }
66
+ score += 1<<1;
67
+ #endif
68
+ #ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
69
+ if (!af.has_fp16_va) { return 0; }
70
+ score += 1<<2;
71
+ #endif
72
+ #ifdef GGML_USE_SVE
73
+ if (!af.has_sve) { return 0; }
74
+ score += 1<<3;
75
+ #endif
76
+ #ifdef GGML_USE_MATMUL_INT8
77
+ if (!af.has_i8mm) { return 0; }
78
+ score += 1<<4;
79
+ #endif
80
+ #ifdef GGML_USE_SVE2
81
+ if (!af.has_sve2) { return 0; }
82
+ score += 1<<5;
83
+ #endif
84
+ #ifdef GGML_USE_SME
85
+ if (!af.has_sme) { return 0; }
86
+ score += 1<<6;
87
+ #endif
88
+
89
+ return score;
90
+ }
91
+
92
+ GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
93
+
94
+ # endif // defined(__aarch64__)