@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -1,7 +1,5 @@
1
1
  include(CheckCXXCompilerFlag)
2
2
 
3
- unset(GGML_CDEF_PUBLIC)
4
-
5
3
  add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
6
4
 
7
5
  # enable libstdc++ assertions for debug builds
@@ -26,902 +24,7 @@ if (NOT MSVC)
26
24
  endif()
27
25
  endif()
28
26
 
29
- unset(GGML_EXTRA_LIBS_PRIVATE)
30
- unset(GGML_EXTRA_LIBS_PUBLIC)
31
-
32
- if (APPLE AND GGML_ACCELERATE)
33
- find_library(ACCELERATE_FRAMEWORK Accelerate)
34
- if (ACCELERATE_FRAMEWORK)
35
- message(STATUS "Accelerate framework found")
36
-
37
- add_compile_definitions(GGML_USE_ACCELERATE)
38
- add_compile_definitions(ACCELERATE_NEW_LAPACK)
39
- add_compile_definitions(ACCELERATE_LAPACK_ILP64)
40
-
41
- list(APPEND GGML_EXTRA_LIBS_PRIVATE ${ACCELERATE_FRAMEWORK})
42
- else()
43
- message(WARNING "Accelerate framework not found")
44
- endif()
45
- endif()
46
-
47
- if (GGML_METAL)
48
- find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
49
- find_library(METAL_FRAMEWORK Metal REQUIRED)
50
- find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
51
-
52
- message(STATUS "Metal framework found")
53
- set(GGML_HEADERS_METAL ../include/ggml-metal.h)
54
- set(GGML_SOURCES_METAL ggml-metal.m)
55
-
56
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_METAL)
57
- if (GGML_METAL_NDEBUG)
58
- add_compile_definitions(GGML_METAL_NDEBUG)
59
- endif()
60
-
61
- # copy ggml-common.h and ggml-metal.metal to bin directory
62
- configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
63
- configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
64
-
65
- if (GGML_METAL_EMBED_LIBRARY)
66
- enable_language(ASM)
67
-
68
- add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
69
-
70
- set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
71
- set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
72
-
73
- file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
74
-
75
- # merge ggml-common.h and ggml-metal.metal into a single file
76
- set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
77
- set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
78
-
79
- add_custom_command(
80
- OUTPUT ${METALLIB_EMBED_ASM}
81
- COMMAND echo "Embedding Metal library"
82
- COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
83
- COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
84
- COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
85
- COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
86
- COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
87
- COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
88
- COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
89
- DEPENDS ggml-metal.metal ggml-common.h
90
- COMMENT "Generate assembly for embedded Metal library"
91
- )
92
-
93
- list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM})
94
- else()
95
- if (GGML_METAL_SHADER_DEBUG)
96
- # custom command to do the following:
97
- # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
98
- # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
99
- #
100
- # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
101
- # disabling fast math is needed in order to pass tests/test-backend-ops
102
- # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
103
- # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
104
- # ref: https://github.com/ggerganov/whisper.cpp/issues/1720
105
- set(XC_FLAGS -fno-fast-math -fno-inline -g)
106
- else()
107
- set(XC_FLAGS -O3)
108
- endif()
109
-
110
- # Append macOS metal versioning flags
111
- if (GGML_METAL_MACOSX_VERSION_MIN)
112
- message(STATUS "Adding -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
113
- list (APPEND XC_FLAGS -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN})
114
- endif()
115
-
116
- if (GGML_METAL_STD)
117
- message(STATUS "Adding -std=${GGML_METAL_STD} flag to metal compilation")
118
- list (APPEND XC_FLAGS -std=${GGML_METAL_STD})
119
- endif()
120
-
121
- add_custom_command(
122
- OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
123
- COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
124
- COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
125
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
126
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
127
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
128
- DEPENDS ggml-metal.metal ggml-common.h
129
- COMMENT "Compiling Metal kernels"
130
- )
131
-
132
- add_custom_target(
133
- ggml-metal ALL
134
- DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
135
- )
136
- endif() # GGML_METAL_EMBED_LIBRARY
137
-
138
- list(APPEND GGML_EXTRA_LIBS_PRIVATE
139
- ${FOUNDATION_LIBRARY}
140
- ${METAL_FRAMEWORK}
141
- ${METALKIT_FRAMEWORK}
142
- )
143
- endif()
144
-
145
- if (GGML_MUSA)
146
- set(CMAKE_C_COMPILER clang)
147
- set(CMAKE_C_EXTENSIONS OFF)
148
- set(CMAKE_CXX_COMPILER clang++)
149
- set(CMAKE_CXX_EXTENSIONS OFF)
150
-
151
- set(GGML_CUDA ON)
152
-
153
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA)
154
- endif()
155
-
156
- if (GGML_OPENMP)
157
- find_package(OpenMP)
158
- if (OpenMP_FOUND)
159
- message(STATUS "OpenMP found")
160
-
161
- add_compile_definitions(GGML_USE_OPENMP)
162
-
163
- list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
164
-
165
- if (GGML_MUSA)
166
- list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp")
167
- list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
168
- endif()
169
- else()
170
- message(WARNING "OpenMP not found")
171
- endif()
172
- endif()
173
-
174
- if (GGML_BLAS)
175
- if (GGML_STATIC)
176
- set(BLA_STATIC ON)
177
- endif()
178
- #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
179
- # set(BLA_SIZEOF_INTEGER 8)
180
- #endif()
181
-
182
- set(BLA_VENDOR ${GGML_BLAS_VENDOR})
183
- find_package(BLAS)
184
-
185
- if (BLAS_FOUND)
186
- message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
187
-
188
- if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${GGML_BLAS_VENDOR} MATCHES "Apple"))
189
- # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
190
- # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
191
- find_package(PkgConfig REQUIRED)
192
- if (${GGML_BLAS_VENDOR} MATCHES "Generic")
193
- pkg_check_modules(DepBLAS REQUIRED blas)
194
- elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
195
- # As of openblas v0.3.22, the 64-bit is named openblas64.pc
196
- pkg_check_modules(DepBLAS openblas64)
197
- if (NOT DepBLAS_FOUND)
198
- pkg_check_modules(DepBLAS REQUIRED openblas)
199
- endif()
200
- elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
201
- pkg_check_modules(DepBLAS REQUIRED blis)
202
- elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
203
- pkg_check_modules(DepBLAS REQUIRED blas-atlas)
204
- elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
205
- pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
206
- elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
207
- # all Intel* libraries share the same include path
208
- pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
209
- elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
210
- # this doesn't provide pkg-config
211
- # suggest to assign BLAS_INCLUDE_DIRS on your own
212
- if ("${NVHPC_VERSION}" STREQUAL "")
213
- message(WARNING "Better to set NVHPC_VERSION")
214
- else()
215
- set(DepBLAS_FOUND ON)
216
- set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
217
- endif()
218
- endif()
219
- if (DepBLAS_FOUND)
220
- set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
221
- else()
222
- message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
223
- " detected by pkgconfig, trying to find cblas.h from possible paths...")
224
- find_path(BLAS_INCLUDE_DIRS
225
- NAMES cblas.h
226
- HINTS
227
- /usr/include
228
- /usr/local/include
229
- /usr/include/openblas
230
- /opt/homebrew/opt/openblas/include
231
- /usr/local/opt/openblas/include
232
- /usr/include/x86_64-linux-gnu/openblas/include
233
- )
234
- endif()
235
- endif()
236
-
237
- message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
238
-
239
- add_compile_options(${BLAS_LINKER_FLAGS})
240
-
241
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_BLAS)
242
-
243
- if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
244
- add_compile_definitions(GGML_BLAS_USE_MKL)
245
- endif()
246
-
247
- set(GGML_HEADERS_BLAS ../include/ggml-blas.h)
248
- set(GGML_SOURCES_BLAS ggml-blas.cpp)
249
-
250
- list(APPEND GGML_EXTRA_LIBS_PRIVATE ${BLAS_LIBRARIES})
251
- list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS})
252
- else()
253
- message(WARNING "BLAS not found, please refer to "
254
- "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
255
- " to set correct GGML_BLAS_VENDOR")
256
- endif()
257
- endif()
258
-
259
- if (GGML_LLAMAFILE)
260
- message(STATUS "Using llamafile")
261
-
262
- add_compile_definitions(GGML_USE_LLAMAFILE)
263
-
264
- set(GGML_HEADERS_LLAMAFILE llamafile/sgemm.h)
265
- set(GGML_SOURCES_LLAMAFILE llamafile/sgemm.cpp)
266
- endif()
267
-
268
- if (GGML_CUDA)
269
- cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
270
-
271
- if (GGML_MUSA)
272
- list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/")
273
- find_package(MUSAToolkit)
274
- set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND})
275
- else()
276
- find_package(CUDAToolkit)
277
- endif()
278
-
279
- if (CUDAToolkit_FOUND)
280
- message(STATUS "CUDA found")
281
-
282
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
283
- # 52 == lowest CUDA 12 standard
284
- # 60 == FP16 CUDA intrinsics
285
- # 61 == integer CUDA intrinsics
286
- # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
287
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
288
- set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
289
- else()
290
- set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
291
- #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
292
- endif()
293
- endif()
294
- message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
295
-
296
- if (GGML_MUSA)
297
- set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE})
298
- else()
299
- enable_language(CUDA)
300
- endif()
301
-
302
- file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh")
303
- list(APPEND GGML_HEADERS_CUDA "../include/ggml-cuda.h")
304
-
305
- file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
306
- list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
307
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
308
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
309
- file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
310
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
311
-
312
- if (GGML_CUDA_FA_ALL_QUANTS)
313
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
314
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
315
- add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
316
- else()
317
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
318
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
319
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
320
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
321
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
322
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
323
- endif()
324
-
325
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
326
-
327
- add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
328
- add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
329
- add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
330
- add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
331
-
332
- if (GGML_CUDA_GRAPHS)
333
- add_compile_definitions(GGML_CUDA_USE_GRAPHS)
334
- endif()
335
-
336
- if (GGML_CUDA_FORCE_DMMV)
337
- add_compile_definitions(GGML_CUDA_FORCE_DMMV)
338
- endif()
339
-
340
- if (GGML_CUDA_FORCE_MMQ)
341
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
342
- endif()
343
-
344
- if (GGML_CUDA_FORCE_CUBLAS)
345
- add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
346
- endif()
347
-
348
- if (GGML_CUDA_NO_VMM)
349
- add_compile_definitions(GGML_CUDA_NO_VMM)
350
- endif()
351
-
352
- if (DEFINED GGML_CUDA_DMMV_Y)
353
- add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility
354
- endif()
355
-
356
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
357
- add_compile_definitions(GGML_CUDA_F16)
358
- endif()
359
-
360
- if (GGML_CUDA_NO_PEER_COPY)
361
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
362
- endif()
363
-
364
- if (GGML_MUSA)
365
- set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
366
- foreach(SOURCE ${GGML_SOURCES_CUDA})
367
- set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
368
- endforeach()
369
- endif()
370
-
371
- if (GGML_STATIC)
372
- if (WIN32)
373
- # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
374
- list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
375
- else ()
376
- if (GGML_MUSA)
377
- list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart_static MUSA::mublas_static)
378
- else()
379
- list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
380
- endif()
381
- endif()
382
- else()
383
- if (GGML_MUSA)
384
- list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart MUSA::mublas)
385
- else()
386
- list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
387
- endif()
388
- endif()
389
-
390
- if (GGML_CUDA_NO_VMM)
391
- # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
392
- else()
393
- if (GGML_MUSA)
394
- list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
395
- else()
396
- list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
397
- endif()
398
- endif()
399
- else()
400
- message(WARNING "CUDA not found")
401
- endif()
402
- endif()
403
-
404
- if (GGML_HIPBLAS)
405
- if (NOT EXISTS $ENV{ROCM_PATH})
406
- if (NOT EXISTS /opt/rocm)
407
- set(ROCM_PATH /usr)
408
- else()
409
- set(ROCM_PATH /opt/rocm)
410
- endif()
411
- else()
412
- set(ROCM_PATH $ENV{ROCM_PATH})
413
- endif()
414
-
415
- list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
416
- list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake")
417
-
418
- # CMake on Windows doesn't support the HIP language yet
419
- if (WIN32)
420
- set(CXX_IS_HIPCC TRUE)
421
- else()
422
- string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
423
- endif()
424
-
425
- if (CXX_IS_HIPCC)
426
- if (LINUX)
427
- if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
428
- message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
429
- endif()
430
-
431
- message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
432
- " Prefer setting the HIP compiler directly. See README for details.")
433
- endif()
434
- else()
435
- # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
436
- if (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
437
- set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
438
- endif()
439
- cmake_minimum_required(VERSION 3.21)
440
- enable_language(HIP)
441
- endif()
442
-
443
- find_package(hip REQUIRED)
444
- find_package(hipblas REQUIRED)
445
- find_package(rocblas REQUIRED)
446
-
447
- message(STATUS "HIP and hipBLAS found")
448
-
449
- file(GLOB GGML_HEADERS_ROCM "ggml-cuda/*.cuh")
450
- list(APPEND GGML_HEADERS_ROCM "../include/ggml-cuda.h")
451
-
452
- file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
453
- list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
454
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
455
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
456
- file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
457
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
458
-
459
- if (GGML_CUDA_FA_ALL_QUANTS)
460
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
461
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
462
- add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
463
- else()
464
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
465
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
466
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
467
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
468
- file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
469
- list(APPEND GGML_SOURCES_ROCM ${SRCS})
470
- endif()
471
-
472
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
473
-
474
- add_compile_definitions(GGML_USE_HIPBLAS)
475
- add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
476
- add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
477
- add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
478
-
479
- if (GGML_HIP_UMA)
480
- add_compile_definitions(GGML_HIP_UMA)
481
- endif()
482
-
483
- if (GGML_CUDA_FORCE_DMMV)
484
- add_compile_definitions(GGML_CUDA_FORCE_DMMV)
485
- endif()
486
-
487
- if (GGML_CUDA_FORCE_MMQ)
488
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
489
- endif()
490
-
491
- if (GGML_CUDA_FORCE_CUBLAS)
492
- add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
493
- endif()
494
-
495
- if (GGML_CUDA_NO_PEER_COPY)
496
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
497
- endif()
498
-
499
- if (CXX_IS_HIPCC)
500
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
501
- list(APPEND GGML_EXTRA_LIBS_PRIVATE hip::device)
502
- else()
503
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
504
- endif()
505
-
506
- if (GGML_STATIC)
507
- message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
508
- endif()
509
-
510
- list(APPEND GGML_EXTRA_LIBS_PUBLIC hip::host roc::rocblas roc::hipblas)
511
- endif()
512
-
513
- if (GGML_SYCL)
514
- if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
515
- message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
516
- endif()
517
-
518
- check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
519
-
520
- if (DEFINED ENV{ONEAPI_ROOT})
521
- message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
522
- elseif(SUPPORTS_SYCL)
523
- message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
524
- If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
525
- source /opt/intel/oneapi/setvars.sh")
526
- else()
527
- message(FATAL_ERROR, "C++ compiler lacks SYCL support.")
528
- endif()
529
- message(STATUS "SYCL found")
530
- #todo: AOT
531
-
532
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL)
533
-
534
- if (GGML_SYCL_F16)
535
- if (GGML_SYCL_TARGET STREQUAL "AMD")
536
- message(WARNING "AMD target does not entirely support FP16 in the SYCL backend.")
537
- endif()
538
- add_compile_definitions(GGML_SYCL_F16)
539
- endif()
540
-
541
- if (GGML_CUDA_FORCE_MMQ)
542
- add_compile_definitions(GGML_SYCL_FORCE_MMQ)
543
- endif()
544
-
545
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
546
-
547
- if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
548
- add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
549
- elseif (GGML_SYCL_TARGET STREQUAL "AMD")
550
- # INFO: Allowed Sub_group_sizes are not consistent through all
551
- # hip targets. For example, 64 is used for certain models, but the backend
552
- # does not support it.
553
- # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
554
- add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
555
- else()
556
- add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
557
- endif()
558
-
559
- file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")
560
- list(APPEND GGML_HEADERS_SYCL "../include/ggml-sycl.h")
561
-
562
- file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
563
- list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
564
-
565
- find_package(DNNL)
566
- message("-- DNNL found:" ${DNNL_FOUND})
567
-
568
- if (GGML_SYCL_TARGET STREQUAL "INTEL")
569
- add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
570
- else()
571
- add_compile_definitions(GGML_SYCL_DNNL=0)
572
- endif()
573
-
574
- if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
575
- list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl)
576
- endif()
577
-
578
- if (WIN32)
579
- find_package(IntelSYCL REQUIRED)
580
- find_package(MKL REQUIRED)
581
- list(APPEND GGML_EXTRA_LIBS_PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
582
- else()
583
- if (GGML_SYCL_TARGET STREQUAL "INTEL")
584
- list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
585
- elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
586
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
587
- list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
588
- elseif (GGML_SYCL_TARGET STREQUAL "AMD")
589
- if (GGML_SYCL_HIP_TARGET STREQUAL "")
590
- message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
591
- endif()
592
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
593
- list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
594
- endif()
595
- endif()
596
- endif()
597
-
598
- if (GGML_RPC)
599
- message(STATUS "RPC found")
600
-
601
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC)
602
-
603
- if (WIN32)
604
- list(APPEND GGML_EXTRA_LIBS_PRIVATE ws2_32)
605
- endif()
606
-
607
- set(GGML_HEADERS_RPC ../include/ggml-rpc.h)
608
- set(GGML_SOURCES_RPC ggml-rpc.cpp)
609
- endif()
610
-
611
- if (GGML_VULKAN)
612
- find_package(Vulkan COMPONENTS glslc REQUIRED)
613
-
614
- if (Vulkan_FOUND)
615
- message(STATUS "Vulkan found")
616
-
617
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN)
618
-
619
- # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
620
- # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
621
- if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
622
- add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
623
- endif()
624
-
625
- if (GGML_VULKAN_CHECK_RESULTS)
626
- add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
627
- endif()
628
-
629
- if (GGML_VULKAN_DEBUG)
630
- add_compile_definitions(GGML_VULKAN_DEBUG)
631
- endif()
632
-
633
- if (GGML_VULKAN_MEMORY_DEBUG)
634
- add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
635
- endif()
636
-
637
- if (GGML_VULKAN_SHADER_DEBUG_INFO)
638
- add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO)
639
- endif()
640
-
641
- if (GGML_VULKAN_PERF)
642
- add_compile_definitions(GGML_VULKAN_PERF)
643
- endif()
644
-
645
- if (GGML_VULKAN_VALIDATE)
646
- add_compile_definitions(GGML_VULKAN_VALIDATE)
647
- endif()
648
-
649
- if (GGML_VULKAN_RUN_TESTS)
650
- add_compile_definitions(GGML_VULKAN_RUN_TESTS)
651
- endif()
652
-
653
- add_subdirectory(vulkan-shaders)
654
-
655
- set (_ggml_vk_genshaders_cmd vulkan-shaders-gen)
656
- set (_ggml_vk_header ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp)
657
- set (_ggml_vk_source ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp)
658
- set (_ggml_vk_input_dir ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders)
659
- set (_ggml_vk_output_dir ${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv)
660
-
661
- file(GLOB _ggml_vk_shader_deps "${_ggml_vk_input_dir}/*.comp")
662
-
663
- add_custom_command(
664
- OUTPUT ${_ggml_vk_header}
665
- ${_ggml_vk_source}
666
-
667
- COMMAND ${_ggml_vk_genshaders_cmd}
668
- --glslc ${Vulkan_GLSLC_EXECUTABLE}
669
- --input-dir ${_ggml_vk_input_dir}
670
- --output-dir ${_ggml_vk_output_dir}
671
- --target-hpp ${_ggml_vk_header}
672
- --target-cpp ${_ggml_vk_source}
673
- --no-clean
674
-
675
- DEPENDS ${_ggml_vk_shader_deps}
676
- COMMENT "Generate vulkan shaders"
677
- )
678
-
679
- set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header})
680
- set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source})
681
-
682
- list(APPEND GGML_EXTRA_LIBS_PRIVATE Vulkan::Vulkan)
683
- list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
684
- else()
685
- message(WARNING "Vulkan not found")
686
- endif()
687
- endif()
688
-
689
- if (GGML_KOMPUTE)
690
- add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
691
-
692
- find_package(Vulkan COMPONENTS glslc REQUIRED)
693
- find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
694
-
695
- if (NOT glslc_executable)
696
- message(FATAL_ERROR "glslc not found")
697
- endif()
698
-
699
- function(compile_shader)
700
- set(options)
701
- set(oneValueArgs)
702
- set(multiValueArgs SOURCES)
703
- cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
704
- foreach(source ${compile_shader_SOURCES})
705
- get_filename_component(filename ${source} NAME)
706
- set(spv_file ${filename}.spv)
707
- add_custom_command(
708
- OUTPUT ${spv_file}
709
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
710
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
711
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
712
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
713
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
714
- COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
715
- COMMENT "Compiling ${source} to ${spv_file}"
716
- )
717
-
718
- get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
719
- set(FILE_NAME "shader${RAW_FILE_NAME}")
720
- string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
721
- string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
722
- string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
723
- set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
724
- message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
725
- if(CMAKE_GENERATOR MATCHES "Visual Studio")
726
- add_custom_command(
727
- OUTPUT ${OUTPUT_HEADER_FILE}
728
- COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
729
- COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
730
- COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
731
- COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
732
- COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
733
- COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
734
- COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
735
- COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
736
- DEPENDS ${spv_file} xxd
737
- COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
738
- )
739
- else()
740
- add_custom_command(
741
- OUTPUT ${OUTPUT_HEADER_FILE}
742
- COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
743
- COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
744
- COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
745
- COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
746
- COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
747
- COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
748
- COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
749
- COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
750
- DEPENDS ${spv_file} xxd
751
- COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
752
- )
753
- endif()
754
- endforeach()
755
- endfunction()
756
-
757
- if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
758
- message(STATUS "Kompute found")
759
- set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
760
- add_subdirectory(kompute)
761
-
762
- # Compile our shaders
763
- compile_shader(SOURCES
764
- kompute-shaders/op_scale.comp
765
- kompute-shaders/op_scale_8.comp
766
- kompute-shaders/op_add.comp
767
- kompute-shaders/op_addrow.comp
768
- kompute-shaders/op_mul.comp
769
- kompute-shaders/op_silu.comp
770
- kompute-shaders/op_relu.comp
771
- kompute-shaders/op_gelu.comp
772
- kompute-shaders/op_softmax.comp
773
- kompute-shaders/op_norm.comp
774
- kompute-shaders/op_rmsnorm.comp
775
- kompute-shaders/op_diagmask.comp
776
- kompute-shaders/op_mul_mat_mat_f32.comp
777
- kompute-shaders/op_mul_mat_f16.comp
778
- kompute-shaders/op_mul_mat_q8_0.comp
779
- kompute-shaders/op_mul_mat_q4_0.comp
780
- kompute-shaders/op_mul_mat_q4_1.comp
781
- kompute-shaders/op_mul_mat_q6_k.comp
782
- kompute-shaders/op_getrows_f32.comp
783
- kompute-shaders/op_getrows_f16.comp
784
- kompute-shaders/op_getrows_q4_0.comp
785
- kompute-shaders/op_getrows_q4_1.comp
786
- kompute-shaders/op_getrows_q6_k.comp
787
- kompute-shaders/op_rope_f16.comp
788
- kompute-shaders/op_rope_f32.comp
789
- kompute-shaders/op_cpy_f16_f16.comp
790
- kompute-shaders/op_cpy_f16_f32.comp
791
- kompute-shaders/op_cpy_f32_f16.comp
792
- kompute-shaders/op_cpy_f32_f32.comp
793
- )
794
-
795
- # Create a custom target for our generated shaders
796
- add_custom_target(generated_shaders DEPENDS
797
- shaderop_scale.h
798
- shaderop_scale_8.h
799
- shaderop_add.h
800
- shaderop_addrow.h
801
- shaderop_mul.h
802
- shaderop_silu.h
803
- shaderop_relu.h
804
- shaderop_gelu.h
805
- shaderop_softmax.h
806
- shaderop_norm.h
807
- shaderop_rmsnorm.h
808
- shaderop_diagmask.h
809
- shaderop_mul_mat_mat_f32.h
810
- shaderop_mul_mat_f16.h
811
- shaderop_mul_mat_q8_0.h
812
- shaderop_mul_mat_q4_0.h
813
- shaderop_mul_mat_q4_1.h
814
- shaderop_mul_mat_q6_k.h
815
- shaderop_getrows_f32.h
816
- shaderop_getrows_f16.h
817
- shaderop_getrows_q4_0.h
818
- shaderop_getrows_q4_1.h
819
- shaderop_getrows_q6_k.h
820
- shaderop_rope_f16.h
821
- shaderop_rope_f32.h
822
- shaderop_cpy_f16_f16.h
823
- shaderop_cpy_f16_f32.h
824
- shaderop_cpy_f32_f16.h
825
- shaderop_cpy_f32_f32.h
826
- )
827
-
828
- # Create a custom command that depends on the generated_shaders
829
- add_custom_command(
830
- OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
831
- COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
832
- DEPENDS generated_shaders
833
- COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
834
- )
835
-
836
- # Add the stamp to the main sources to ensure dependency tracking
837
- set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
838
- set(GGML_HEADERS_KOMPUTE ../include/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
839
-
840
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE)
841
-
842
- list(APPEND GGML_EXTRA_LIBS_PRIVATE kompute)
843
- list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
844
- else()
845
- message(WARNING "Kompute not found")
846
- endif()
847
- endif()
848
-
849
- if (GGML_CPU_HBM)
850
- find_library(memkind memkind REQUIRED)
851
-
852
- message(STATUS "Using memkind for CPU HBM")
853
-
854
- add_compile_definitions(GGML_USE_CPU_HBM)
855
-
856
- target_link_libraries(ggml PUBLIC memkind)
857
- endif()
858
-
859
- if (GGML_CANN)
860
- if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
861
- set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
862
- message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
863
- endif()
864
-
865
- if (CANN_INSTALL_DIR)
866
- # Only Support Linux.
867
- if (GGML_CANN)
868
- if (NOT UNIX)
869
- set(GGML_CANN OFF)
870
- message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
871
- endif()
872
- endif()
873
-
874
- # Supported platforms: x86-64, arm64
875
- if (GGML_CANN)
876
- if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
877
- elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
878
- else()
879
- set(GGML_CANN OFF)
880
- message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
881
- endif()
882
- endif()
883
-
884
- # Set header and libs
885
- if(GGML_CANN)
886
- set(CANN_INCLUDE_DIRS
887
- ${CANN_INSTALL_DIR}/include
888
- ${CANN_INSTALL_DIR}/include/aclnn
889
- ${CANN_INSTALL_DIR}/acllib/include
890
- )
891
-
892
- add_subdirectory(ggml-cann/kernels)
893
- list(APPEND CANN_LIBRARIES
894
- ascendcl
895
- nnopbase
896
- opapi
897
- acl_op_compiler
898
- ascendc_kernels
899
- )
900
-
901
- set(GGML_HEADERS_CANN "../include/ggml-cann.h")
902
- file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
903
- list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
904
-
905
- message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
906
- message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
907
-
908
- list(APPEND GGML_EXTRA_LIBS_PRIVATE ${CANN_LIBRARIES} )
909
- list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS})
910
- list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64)
911
-
912
- list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
913
- endif()
914
- else()
915
- set(GGML_CANN OFF)
916
- message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
917
- endif()
918
-
919
- if(NOT GGML_CANN)
920
- message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
921
- endif()
922
- endif()
923
-
924
- function(get_flags CCID CCVER)
27
+ function(ggml_get_flags CCID CCVER)
925
28
  set(C_FLAGS "")
926
29
  set(CXX_FLAGS "")
927
30
 
@@ -939,11 +42,6 @@ function(get_flags CCID CCVER)
939
42
  set(C_FLAGS -Wdouble-promotion)
940
43
  set(CXX_FLAGS -Wno-array-bounds)
941
44
 
942
- if (NOT GGML_MUSA)
943
- if (CCVER VERSION_GREATER_EQUAL 7.1.0)
944
- list(APPEND CXX_FLAGS -Wno-format-truncation)
945
- endif()
946
- endif()
947
45
  if (CCVER VERSION_GREATER_EQUAL 8.1.0)
948
46
  list(APPEND CXX_FLAGS -Wextra-semi)
949
47
  endif()
@@ -972,7 +70,7 @@ if (GGML_ALL_WARNINGS)
972
70
  list(APPEND C_FLAGS ${WARNING_FLAGS})
973
71
  list(APPEND CXX_FLAGS ${WARNING_FLAGS})
974
72
 
975
- get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
73
+ ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
976
74
 
977
75
  add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
978
76
  "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
@@ -983,54 +81,6 @@ if (GGML_ALL_WARNINGS)
983
81
  endif()
984
82
  endif()
985
83
 
986
- set(CUDA_CXX_FLAGS "")
987
-
988
- if (GGML_CUDA)
989
- set(CUDA_FLAGS -use_fast_math)
990
-
991
- if (GGML_FATAL_WARNINGS)
992
- list(APPEND CUDA_FLAGS -Werror all-warnings)
993
- endif()
994
-
995
- if (GGML_ALL_WARNINGS AND NOT MSVC)
996
- set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
997
- if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
998
- list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
999
- endif()
1000
-
1001
- execute_process(
1002
- COMMAND ${NVCC_CMD} -Xcompiler --version
1003
- OUTPUT_VARIABLE CUDA_CCFULLVER
1004
- ERROR_QUIET
1005
- )
1006
-
1007
- if (NOT CUDA_CCFULLVER MATCHES clang)
1008
- set(CUDA_CCID "GNU")
1009
- execute_process(
1010
- COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
1011
- OUTPUT_VARIABLE CUDA_CCVER
1012
- ERROR_QUIET
1013
- )
1014
- else()
1015
- if (CUDA_CCFULLVER MATCHES Apple)
1016
- set(CUDA_CCID "AppleClang")
1017
- else()
1018
- set(CUDA_CCID "Clang")
1019
- endif()
1020
- string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
1021
- endif()
1022
-
1023
- message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
1024
-
1025
- get_flags(${CUDA_CCID} ${CUDA_CCVER})
1026
- list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
1027
- endif()
1028
-
1029
- if (NOT MSVC)
1030
- list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
1031
- endif()
1032
- endif()
1033
-
1034
84
  if (GGML_LTO)
1035
85
  include(CheckIPOSupported)
1036
86
  check_ipo_supported(RESULT result OUTPUT output)
@@ -1088,168 +138,6 @@ if (NOT MSVC)
1088
138
  endif()
1089
139
  endif()
1090
140
 
1091
- set(ARCH_FLAGS "")
1092
-
1093
- if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
1094
- CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
1095
- (NOT CMAKE_OSX_ARCHITECTURES AND
1096
- NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1097
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
1098
-
1099
- message(STATUS "ARM detected")
1100
-
1101
- if (MSVC)
1102
- add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
1103
- add_compile_definitions(__ARM_NEON)
1104
- add_compile_definitions(__ARM_FEATURE_FMA)
1105
-
1106
- set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
1107
- string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
1108
-
1109
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
1110
- if (GGML_COMPILER_SUPPORT_DOTPROD)
1111
- add_compile_definitions(__ARM_FEATURE_DOTPROD)
1112
- endif ()
1113
-
1114
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
1115
-
1116
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
1117
- add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
1118
- endif ()
1119
-
1120
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1121
- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1122
- add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
1123
- endif ()
1124
-
1125
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
1126
- else()
1127
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
1128
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
1129
- list(APPEND ARCH_FLAGS -mfp16-format=ieee)
1130
- endif()
1131
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
1132
- # Raspberry Pi 1, Zero
1133
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
1134
- endif()
1135
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
1136
- if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
1137
- # Android armeabi-v7a
1138
- list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
1139
- else()
1140
- # Raspberry Pi 2
1141
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
1142
- endif()
1143
- endif()
1144
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
1145
- # Android arm64-v8a
1146
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
1147
- list(APPEND ARCH_FLAGS -mno-unaligned-access)
1148
- endif()
1149
- if (GGML_SVE)
1150
- list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
1151
- endif()
1152
- endif()
1153
- elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
1154
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1155
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
1156
- message(STATUS "x86 detected")
1157
- if (MSVC)
1158
- # instruction set detection for MSVC only
1159
- if (GGML_NATIVE)
1160
- # TODO: improve, should not reference files from the parent folder
1161
- include(../cmake/FindSIMD.cmake)
1162
- endif ()
1163
- if (GGML_AVX512)
1164
- list(APPEND ARCH_FLAGS /arch:AVX512)
1165
- # MSVC has no compile-time flags enabling specific
1166
- # AVX512 extensions, neither it defines the
1167
- # macros corresponding to the extensions.
1168
- # Do it manually.
1169
- if (GGML_AVX512_VBMI)
1170
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
1171
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
1172
- endif()
1173
- if (GGML_AVX512_VNNI)
1174
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
1175
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
1176
- endif()
1177
- if (GGML_AVX512_BF16)
1178
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
1179
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
1180
- endif()
1181
- elseif (GGML_AVX2)
1182
- list(APPEND ARCH_FLAGS /arch:AVX2)
1183
- elseif (GGML_AVX)
1184
- list(APPEND ARCH_FLAGS /arch:AVX)
1185
- endif()
1186
- else()
1187
- if (GGML_NATIVE)
1188
- list(APPEND ARCH_FLAGS -march=native)
1189
- endif()
1190
- if (GGML_F16C)
1191
- list(APPEND ARCH_FLAGS -mf16c)
1192
- endif()
1193
- if (GGML_FMA)
1194
- list(APPEND ARCH_FLAGS -mfma)
1195
- endif()
1196
- if (GGML_AVX)
1197
- list(APPEND ARCH_FLAGS -mavx)
1198
- endif()
1199
- if (GGML_AVX2)
1200
- list(APPEND ARCH_FLAGS -mavx2)
1201
- endif()
1202
- if (GGML_AVX512)
1203
- list(APPEND ARCH_FLAGS -mavx512f)
1204
- list(APPEND ARCH_FLAGS -mavx512dq)
1205
- list(APPEND ARCH_FLAGS -mavx512bw)
1206
- endif()
1207
- if (GGML_AVX512_VBMI)
1208
- list(APPEND ARCH_FLAGS -mavx512vbmi)
1209
- endif()
1210
- if (GGML_AVX512_VNNI)
1211
- list(APPEND ARCH_FLAGS -mavx512vnni)
1212
- endif()
1213
- if (GGML_AVX512_BF16)
1214
- list(APPEND ARCH_FLAGS -mavx512bf16)
1215
- endif()
1216
- endif()
1217
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1218
- message(STATUS "PowerPC detected")
1219
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
1220
- list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
1221
- else()
1222
- list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
1223
- #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
1224
- endif()
1225
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
1226
- message(STATUS "loongarch64 detected")
1227
-
1228
- list(APPEND ARCH_FLAGS -march=loongarch64)
1229
- if (GGML_LASX)
1230
- list(APPEND ARCH_FLAGS -mlasx)
1231
- endif()
1232
- if (GGML_LSX)
1233
- list(APPEND ARCH_FLAGS -mlsx)
1234
- endif()
1235
- else()
1236
- message(STATUS "Unknown architecture")
1237
- endif()
1238
-
1239
- add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
1240
- add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
1241
-
1242
- if (GGML_CUDA)
1243
- list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
1244
- list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
1245
-
1246
- if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
1247
- list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
1248
- endif()
1249
-
1250
- add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
1251
- endif()
1252
-
1253
141
  if (MINGW)
1254
142
  # Target Windows 8 for PrefetchVirtualMemory
1255
143
  add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
@@ -1263,14 +151,14 @@ endif()
1263
151
  # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
1264
152
  # posix_memalign came in POSIX.1-2001 / SUSv3
1265
153
  # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
1266
- add_compile_definitions(_XOPEN_SOURCE=600)
1267
154
 
1268
155
  # Somehow in OpenBSD whenever POSIX conformance is specified
1269
156
  # some string functions rely on locale_t availability,
1270
157
  # which was introduced in POSIX.1-2008, forcing us to go higher
1271
158
  if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1272
- remove_definitions(-D_XOPEN_SOURCE=600)
1273
159
  add_compile_definitions(_XOPEN_SOURCE=700)
160
+ else()
161
+ add_compile_definitions(_XOPEN_SOURCE=600)
1274
162
  endif()
1275
163
 
1276
164
  # Data types, macros and functions related to controlling CPU affinity and
@@ -1306,66 +194,144 @@ endif()
1306
194
 
1307
195
  if (WIN32)
1308
196
  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
1309
-
1310
- if (BUILD_SHARED_LIBS)
1311
- # TODO: should not use this
1312
- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
1313
- endif()
1314
197
  endif()
1315
198
 
1316
- #
1317
- # libraries
1318
- #
1319
-
1320
199
  # ggml
1321
200
 
1322
- add_library(ggml
201
+ if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
202
+ message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
203
+ endif()
204
+
205
+ add_library(ggml-base
1323
206
  ../include/ggml.h
1324
207
  ../include/ggml-alloc.h
1325
208
  ../include/ggml-backend.h
209
+ ../include/ggml-cpp.h
210
+ ../include/ggml-opt.h
1326
211
  ggml.c
1327
212
  ggml-alloc.c
1328
213
  ggml-backend.cpp
214
+ ggml-opt.cpp
215
+ ggml-threading.cpp
216
+ ggml-threading.h
1329
217
  ggml-quants.c
1330
- ggml-quants.h
1331
- ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1332
- ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1333
- ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
1334
- ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1335
- ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1336
- ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1337
- ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1338
- ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1339
- ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
1340
- ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1341
- ${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
1342
- ggml-aarch64.c ggml-aarch64.h
1343
- )
218
+ ggml-quants.h)
219
+
220
+ target_include_directories(ggml-base PRIVATE .)
1344
221
 
1345
- if (EMSCRIPTEN)
1346
- set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128")
222
+ add_library(ggml
223
+ ggml-backend-reg.cpp)
224
+
225
+ target_link_libraries(ggml PUBLIC ggml-base)
226
+
227
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
228
+ target_link_libraries(ggml PRIVATE dl)
1347
229
  endif()
1348
230
 
1349
- target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
1350
- target_include_directories(ggml PUBLIC ../include)
1351
- target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
1352
- target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
1353
- target_compile_features (ggml PRIVATE c_std_11) # don't bump
231
+ function(ggml_add_backend_library backend)
232
+ if (GGML_BACKEND_DL)
233
+ add_library(${backend} MODULE ${ARGN})
234
+ # write the shared library to the output directory
235
+ set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
236
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
237
+ else()
238
+ add_library(${backend} ${ARGN})
239
+ target_link_libraries(ggml PUBLIC ${backend})
240
+ install(TARGETS ${backend} LIBRARY)
241
+ endif()
242
+
243
+ target_link_libraries(${backend} PRIVATE ggml-base)
244
+ target_include_directories(${backend} PRIVATE ..)
245
+
246
+ if (${BUILD_SHARED_LIBS})
247
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
248
+ target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
249
+ endif()
250
+ endfunction()
251
+
252
+ function(ggml_add_backend backend)
253
+ string(TOUPPER "GGML_${backend}" backend_id)
254
+ if (${backend_id})
255
+ string(TOLOWER "ggml-${backend}" backend_target)
256
+ add_subdirectory(${backend_target})
257
+ message(STATUS "Including ${backend} backend")
258
+ if (NOT GGML_BACKEND_DL)
259
+ string(TOUPPER "GGML_USE_${backend}" backend_use)
260
+ target_compile_definitions(ggml PUBLIC ${backend_use})
261
+ endif()
262
+ endif()
263
+ endfunction()
264
+
265
+ function(ggml_add_cpu_backend_variant tag_name)
266
+ set(GGML_CPU_TAG_NAME ${tag_name})
267
+ # other: OPENMP LLAMAFILE CPU_HBM
268
+ foreach (feat NATIVE
269
+ AVX AVX2 AVX_VNNI FMA F16C
270
+ AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
271
+ AMX_TILE AMX_INT8 AMX_BF16)
272
+ set(GGML_${feat} OFF)
273
+ endforeach()
274
+
275
+ foreach (feat ${ARGN})
276
+ set(GGML_${feat} ON)
277
+ endforeach()
278
+
279
+ ggml_add_cpu_backend_variant_impl(${tag_name})
280
+ endfunction()
1354
281
 
1355
- list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
282
+ ggml_add_backend(CPU)
283
+
284
+ if (GGML_CPU_ALL_VARIANTS)
285
+ if (NOT GGML_BACKEND_DL)
286
+ message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
287
+ endif()
288
+ ggml_add_cpu_backend_variant(sandybridge AVX)
289
+ ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
290
+ ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
291
+ ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
292
+ if (NOT MSVC)
293
+ # MSVC doesn't support AVX-VNNI or AMX
294
+ ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
295
+ ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
296
+ endif()
297
+ else ()
298
+ ggml_add_cpu_backend_variant_impl("")
299
+ endif()
300
+
301
+ ggml_add_backend(BLAS)
302
+ ggml_add_backend(CANN)
303
+ ggml_add_backend(CUDA)
304
+ ggml_add_backend(HIP)
305
+ ggml_add_backend(Kompute)
306
+ ggml_add_backend(METAL)
307
+ ggml_add_backend(MUSA)
308
+ ggml_add_backend(RPC)
309
+ ggml_add_backend(SYCL)
310
+ ggml_add_backend(Vulkan)
311
+ ggml_add_backend(OpenCL)
312
+
313
+ foreach (target ggml-base ggml)
314
+ target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
315
+ target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
316
+ endforeach()
317
+
318
+ target_link_libraries(ggml-base PRIVATE Threads::Threads)
1356
319
 
1357
320
  find_library(MATH_LIBRARY m)
1358
321
  if (MATH_LIBRARY)
1359
- if (NOT WIN32 OR NOT GGML_SYCL)
1360
- list(APPEND GGML_EXTRA_LIBS_PRIVATE m)
322
+ if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT})
323
+ target_link_libraries(ggml-base PRIVATE m)
1361
324
  endif()
1362
325
  endif()
1363
326
 
1364
- list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PRIVATE)
1365
- list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PUBLIC)
1366
- target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTRA_LIBS_PUBLIC})
327
+ if (CMAKE_SYSTEM_NAME MATCHES "Android")
328
+ target_link_libraries(ggml-base PRIVATE dl)
329
+ endif()
1367
330
 
1368
331
  if (BUILD_SHARED_LIBS)
1369
- set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
1370
- target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)
332
+ foreach (target ggml-base ggml)
333
+ set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
334
+ target_compile_definitions(${target} PRIVATE GGML_BUILD)
335
+ target_compile_definitions(${target} PUBLIC GGML_SHARED)
336
+ endforeach()
1371
337
  endif()