@fugood/llama.node 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. package/CMakeLists.txt +5 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +1 -1
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/LoadSessionWorker.cpp +1 -0
  23. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  24. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  27. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  28. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  29. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  31. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  32. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  33. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  37. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  38. package/src/llama.cpp/CMakeLists.txt +91 -1245
  39. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  40. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  41. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  42. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  43. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  44. package/src/llama.cpp/common/common.cpp +1116 -877
  45. package/src/llama.cpp/common/common.h +191 -77
  46. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  47. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  48. package/src/llama.cpp/common/log.h +1 -1
  49. package/src/llama.cpp/common/ngram-cache.h +10 -3
  50. package/src/llama.cpp/common/sampling.cpp +19 -10
  51. package/src/llama.cpp/docs/build.md +353 -0
  52. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  53. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  55. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  57. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  59. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  61. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  63. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  64. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  65. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  66. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  67. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  68. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  69. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  70. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  71. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  72. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  73. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  74. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  76. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  77. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  78. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  80. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  87. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  88. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  89. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  90. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  91. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  92. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  93. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  94. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  95. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  97. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  98. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  99. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  100. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  102. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  103. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  104. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  105. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  106. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  107. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  108. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  109. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  110. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  111. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  112. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  113. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  114. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  115. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  116. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  117. package/src/llama.cpp/examples/main/main.cpp +98 -75
  118. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  119. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  120. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  121. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  122. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  123. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  124. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  125. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  126. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  127. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  129. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  130. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  131. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  133. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  134. package/src/llama.cpp/examples/server/server.cpp +274 -671
  135. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  136. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  137. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  138. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  139. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  140. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  141. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  142. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  143. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  144. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  145. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  146. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  147. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  148. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  149. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  150. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  151. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  152. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  153. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  154. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  155. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  156. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  157. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  159. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  160. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  161. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  162. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  163. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  178. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  179. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  180. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  181. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  182. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  183. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  184. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  185. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  208. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  209. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  210. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  211. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  212. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  214. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  215. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  216. package/src/llama.cpp/models/.editorconfig +1 -0
  217. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  221. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  224. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  230. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  233. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  237. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  243. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  246. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  249. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  259. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  260. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  261. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  263. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  264. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  265. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  266. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  267. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  268. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  269. package/src/llama.cpp/requirements.txt +5 -4
  270. package/src/llama.cpp/scripts/build-info.sh +30 -0
  271. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  272. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  273. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  274. package/src/llama.cpp/src/llama-grammar.h +39 -0
  275. package/src/llama.cpp/src/llama-impl.h +26 -0
  276. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  277. package/src/llama.cpp/src/llama-sampling.h +56 -0
  278. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  279. package/src/llama.cpp/src/llama-vocab.h +130 -0
  280. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  281. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  282. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  283. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  284. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  285. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  286. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  287. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  289. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  290. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  291. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  292. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  293. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  294. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  295. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  296. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  297. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  298. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  299. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  300. package/bin/darwin/arm64/default.metallib +0 -0
  301. package/bin/darwin/x64/default.metallib +0 -0
  302. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  303. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  304. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  305. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  306. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  307. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  308. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  309. package/src/llama.cpp/ggml-opencl.h +0 -36
  310. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  311. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  314. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  315. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  316. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  317. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  318. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  319. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  320. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -0,0 +1,1329 @@
1
+ include(CheckCXXCompilerFlag)
2
+
3
+ unset(GGML_CDEF_PUBLIC)
4
+
5
+ add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
6
+
7
+ # enable libstdc++ assertions for debug builds
8
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
9
+ add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
10
+ endif()
11
+
12
+ if (NOT MSVC)
13
+ if (GGML_SANITIZE_THREAD)
14
+ add_compile_options(-fsanitize=thread)
15
+ link_libraries (-fsanitize=thread)
16
+ endif()
17
+
18
+ if (GGML_SANITIZE_ADDRESS)
19
+ add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
20
+ link_libraries (-fsanitize=address)
21
+ endif()
22
+
23
+ if (GGML_SANITIZE_UNDEFINED)
24
+ add_compile_options(-fsanitize=undefined)
25
+ link_libraries (-fsanitize=undefined)
26
+ endif()
27
+ endif()
28
+
29
+ if (APPLE AND GGML_ACCELERATE)
30
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
31
+ if (ACCELERATE_FRAMEWORK)
32
+ message(STATUS "Accelerate framework found")
33
+
34
+ add_compile_definitions(GGML_USE_ACCELERATE)
35
+ add_compile_definitions(ACCELERATE_NEW_LAPACK)
36
+ add_compile_definitions(ACCELERATE_LAPACK_ILP64)
37
+
38
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
39
+ else()
40
+ message(WARNING "Accelerate framework not found")
41
+ endif()
42
+ endif()
43
+
44
+ if (GGML_METAL)
45
+ find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
46
+ find_library(METAL_FRAMEWORK Metal REQUIRED)
47
+ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
48
+
49
+ message(STATUS "Metal framework found")
50
+ set(GGML_HEADERS_METAL ../include/ggml-metal.h)
51
+ set(GGML_SOURCES_METAL ggml-metal.m)
52
+
53
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_METAL)
54
+ if (GGML_METAL_NDEBUG)
55
+ add_compile_definitions(GGML_METAL_NDEBUG)
56
+ endif()
57
+
58
+ # copy ggml-common.h and ggml-metal.metal to bin directory
59
+ configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
60
+ configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
61
+
62
+ if (GGML_METAL_EMBED_LIBRARY)
63
+ enable_language(ASM)
64
+
65
+ add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
66
+
67
+ set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
68
+ set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
69
+
70
+ file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
71
+
72
+ # merge ggml-common.h and ggml-metal.metal into a single file
73
+ set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
74
+ set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
75
+
76
+ add_custom_command(
77
+ OUTPUT ${METALLIB_EMBED_ASM}
78
+ COMMAND echo "Embedding Metal library"
79
+ COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
80
+ COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
81
+ COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
82
+ COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
83
+ COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
84
+ COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
85
+ COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
86
+ DEPENDS ggml-metal.metal ggml-common.h
87
+ COMMENT "Generate assembly for embedded Metal library"
88
+ )
89
+
90
+ set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
91
+ else()
92
+ if (GGML_METAL_SHADER_DEBUG)
93
+ # custom command to do the following:
94
+ # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
95
+ # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
96
+ #
97
+ # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
98
+ # disabling fast math is needed in order to pass tests/test-backend-ops
99
+ # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
100
+ # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
101
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/1720
102
+ set(XC_FLAGS -fno-fast-math -fno-inline -g)
103
+ else()
104
+ set(XC_FLAGS -O3)
105
+ endif()
106
+
107
+ # Append macOS metal versioning flags
108
+ if (GGML_METAL_MACOSX_VERSION_MIN)
109
+ message(STATUS "Adding -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
110
+ list (APPEND XC_FLAGS -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN})
111
+ endif()
112
+
113
+ if (GGML_METAL_STD)
114
+ message(STATUS "Adding -std=${GGML_METAL_STD} flag to metal compilation")
115
+ list (APPEND XC_FLAGS -std=${GGML_METAL_STD})
116
+ endif()
117
+
118
+ add_custom_command(
119
+ OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
120
+ COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
121
+ COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
122
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
123
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
124
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
125
+ DEPENDS ggml-metal.metal ggml-common.h
126
+ COMMENT "Compiling Metal kernels"
127
+ )
128
+
129
+ add_custom_target(
130
+ ggml-metal ALL
131
+ DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
132
+ )
133
+ endif() # GGML_METAL_EMBED_LIBRARY
134
+
135
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS}
136
+ ${FOUNDATION_LIBRARY}
137
+ ${METAL_FRAMEWORK}
138
+ ${METALKIT_FRAMEWORK}
139
+ )
140
+ endif()
141
+
142
+ if (GGML_MUSA)
143
+ set(CMAKE_C_COMPILER clang)
144
+ set(CMAKE_C_EXTENSIONS OFF)
145
+ set(CMAKE_CXX_COMPILER clang++)
146
+ set(CMAKE_CXX_EXTENSIONS OFF)
147
+
148
+ set(GGML_CUDA ON)
149
+
150
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA)
151
+ endif()
152
+
153
+ if (GGML_OPENMP)
154
+ find_package(OpenMP)
155
+ if (OpenMP_FOUND)
156
+ message(STATUS "OpenMP found")
157
+
158
+ add_compile_definitions(GGML_USE_OPENMP)
159
+
160
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
161
+
162
+ if (GGML_MUSA)
163
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} "/usr/lib/llvm-10/include/openmp")
164
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} "/usr/lib/llvm-10/lib/libomp.so")
165
+ endif()
166
+ else()
167
+ message(WARNING "OpenMP not found")
168
+ endif()
169
+ endif()
170
+
171
+ if (GGML_BLAS)
172
+ if (GGML_STATIC)
173
+ set(BLA_STATIC ON)
174
+ endif()
175
+ #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
176
+ # set(BLA_SIZEOF_INTEGER 8)
177
+ #endif()
178
+
179
+ set(BLA_VENDOR ${GGML_BLAS_VENDOR})
180
+ find_package(BLAS)
181
+
182
+ if (BLAS_FOUND)
183
+ message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
184
+
185
+ if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${GGML_BLAS_VENDOR} MATCHES "Apple"))
186
+ # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
187
+ # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
188
+ find_package(PkgConfig REQUIRED)
189
+ if (${GGML_BLAS_VENDOR} MATCHES "Generic")
190
+ pkg_check_modules(DepBLAS REQUIRED blas)
191
+ elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
192
+ # As of openblas v0.3.22, the 64-bit is named openblas64.pc
193
+ pkg_check_modules(DepBLAS openblas64)
194
+ if (NOT DepBLAS_FOUND)
195
+ pkg_check_modules(DepBLAS REQUIRED openblas)
196
+ endif()
197
+ elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
198
+ pkg_check_modules(DepBLAS REQUIRED blis)
199
+ elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
200
+ pkg_check_modules(DepBLAS REQUIRED blas-atlas)
201
+ elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
202
+ pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
203
+ elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
204
+ # all Intel* libraries share the same include path
205
+ pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
206
+ elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
207
+ # this doesn't provide pkg-config
208
+ # suggest to assign BLAS_INCLUDE_DIRS on your own
209
+ if ("${NVHPC_VERSION}" STREQUAL "")
210
+ message(WARNING "Better to set NVHPC_VERSION")
211
+ else()
212
+ set(DepBLAS_FOUND ON)
213
+ set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
214
+ endif()
215
+ endif()
216
+ if (DepBLAS_FOUND)
217
+ set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
218
+ else()
219
+ message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
220
+ " detected by pkgconfig, trying to find cblas.h from possible paths...")
221
+ find_path(BLAS_INCLUDE_DIRS
222
+ NAMES cblas.h
223
+ HINTS
224
+ /usr/include
225
+ /usr/local/include
226
+ /usr/include/openblas
227
+ /opt/homebrew/opt/openblas/include
228
+ /usr/local/opt/openblas/include
229
+ /usr/include/x86_64-linux-gnu/openblas/include
230
+ )
231
+ endif()
232
+ endif()
233
+
234
+ message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
235
+
236
+ add_compile_options(${BLAS_LINKER_FLAGS})
237
+
238
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_BLAS)
239
+
240
+ if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
241
+ add_compile_definitions(GGML_BLAS_USE_MKL)
242
+ endif()
243
+
244
+ set(GGML_HEADERS_BLAS ../include/ggml-blas.h)
245
+ set(GGML_SOURCES_BLAS ggml-blas.cpp)
246
+
247
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES})
248
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
249
+ else()
250
+ message(WARNING "BLAS not found, please refer to "
251
+ "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
252
+ " to set correct GGML_BLAS_VENDOR")
253
+ endif()
254
+ endif()
255
+
256
+ if (GGML_LLAMAFILE)
257
+ message(STATUS "Using llamafile")
258
+
259
+ add_compile_definitions(GGML_USE_LLAMAFILE)
260
+
261
+ set(GGML_HEADERS_LLAMAFILE llamafile/sgemm.h)
262
+ set(GGML_SOURCES_LLAMAFILE llamafile/sgemm.cpp)
263
+ endif()
264
+
265
+ if (GGML_CUDA)
266
+ cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
267
+
268
+ if (GGML_MUSA)
269
+ list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/")
270
+ find_package(MUSAToolkit)
271
+ set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND})
272
+ else()
273
+ find_package(CUDAToolkit)
274
+ endif()
275
+
276
+ if (CUDAToolkit_FOUND)
277
+ message(STATUS "CUDA found")
278
+
279
+ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
280
+ # 52 == lowest CUDA 12 standard
281
+ # 60 == FP16 CUDA intrinsics
282
+ # 61 == integer CUDA intrinsics
283
+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
284
+ if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
285
+ set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
286
+ else()
287
+ set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
288
+ #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
289
+ endif()
290
+ endif()
291
+ message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
292
+
293
+ if (GGML_MUSA)
294
+ set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE})
295
+ else()
296
+ enable_language(CUDA)
297
+ endif()
298
+
299
+ file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh")
300
+ list(APPEND GGML_HEADERS_CUDA "../include/ggml-cuda.h")
301
+
302
+ file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
303
+ list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
304
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
305
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
306
+ file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
307
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
308
+
309
+ if (GGML_CUDA_FA_ALL_QUANTS)
310
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
311
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
312
+ add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
313
+ else()
314
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
315
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
316
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
317
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
318
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
319
+ list(APPEND GGML_SOURCES_CUDA ${SRCS})
320
+ endif()
321
+
322
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
323
+
324
+ add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
325
+ add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
326
+ add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
327
+ add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
328
+
329
+ if (GGML_CUDA_USE_GRAPHS)
330
+ add_compile_definitions(GGML_CUDA_USE_GRAPHS)
331
+ endif()
332
+
333
+ if (GGML_CUDA_FORCE_DMMV)
334
+ add_compile_definitions(GGML_CUDA_FORCE_DMMV)
335
+ endif()
336
+
337
+ if (GGML_CUDA_FORCE_MMQ)
338
+ add_compile_definitions(GGML_CUDA_FORCE_MMQ)
339
+ endif()
340
+
341
+ if (GGML_CUDA_FORCE_CUBLAS)
342
+ add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
343
+ endif()
344
+
345
+ if (GGML_CUDA_NO_VMM)
346
+ add_compile_definitions(GGML_CUDA_NO_VMM)
347
+ endif()
348
+
349
+ if (DEFINED GGML_CUDA_DMMV_Y)
350
+ add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility
351
+ endif()
352
+
353
+ if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
354
+ add_compile_definitions(GGML_CUDA_F16)
355
+ endif()
356
+
357
+ if (GGML_CUDA_NO_PEER_COPY)
358
+ add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
359
+ endif()
360
+
361
+ if (GGML_MUSA)
362
+ set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
363
+ foreach(SOURCE ${GGML_SOURCES_CUDA})
364
+ set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_22")
365
+ endforeach()
366
+ endif()
367
+
368
+ if (GGML_STATIC)
369
+ if (WIN32)
370
+ # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
371
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
372
+ else ()
373
+ if (GGML_MUSA)
374
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart_static MUSA::mublas_static)
375
+ else()
376
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
377
+ endif()
378
+ endif()
379
+ else()
380
+ if (GGML_MUSA)
381
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart MUSA::mublas)
382
+ else()
383
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
384
+ endif()
385
+ endif()
386
+
387
+ if (GGML_CUDA_NO_VMM)
388
+ # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
389
+ else()
390
+ if (GGML_MUSA)
391
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
392
+ else()
393
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
394
+ endif()
395
+ endif()
396
+ else()
397
+ message(WARNING "CUDA not found")
398
+ endif()
399
+ endif()
400
+
401
+ if (GGML_HIPBLAS)
402
+ if (NOT EXISTS $ENV{ROCM_PATH})
403
+ if (NOT EXISTS /opt/rocm)
404
+ set(ROCM_PATH /usr)
405
+ else()
406
+ set(ROCM_PATH /opt/rocm)
407
+ endif()
408
+ else()
409
+ set(ROCM_PATH $ENV{ROCM_PATH})
410
+ endif()
411
+
412
+ list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
413
+ list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake")
414
+
415
+ # CMake on Windows doesn't support the HIP language yet
416
+ if (WIN32)
417
+ set(CXX_IS_HIPCC TRUE)
418
+ else()
419
+ string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
420
+ endif()
421
+
422
+ if (CXX_IS_HIPCC)
423
+ if (LINUX)
424
+ if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
425
+ message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
426
+ endif()
427
+
428
+ message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
429
+ " Prefer setting the HIP compiler directly. See README for details.")
430
+ endif()
431
+ else()
432
+ # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
433
+ if (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
434
+ set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
435
+ endif()
436
+ cmake_minimum_required(VERSION 3.21)
437
+ enable_language(HIP)
438
+ endif()
439
+
440
+ find_package(hip REQUIRED)
441
+ find_package(hipblas REQUIRED)
442
+ find_package(rocblas REQUIRED)
443
+
444
+ message(STATUS "HIP and hipBLAS found")
445
+
446
+ file(GLOB GGML_HEADERS_ROCM "ggml-cuda/*.cuh")
447
+ list(APPEND GGML_HEADERS_ROCM "../include/ggml-cuda.h")
448
+
449
+ file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
450
+ list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
451
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
452
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
453
+ file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
454
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
455
+
456
+ if (GGML_CUDA_FA_ALL_QUANTS)
457
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
458
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
459
+ add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
460
+ else()
461
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
462
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
463
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
464
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
465
+ file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
466
+ list(APPEND GGML_SOURCES_ROCM ${SRCS})
467
+ endif()
468
+
469
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA)
470
+
471
+ add_compile_definitions(GGML_USE_HIPBLAS)
472
+ add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
473
+ add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
474
+ add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
475
+
476
+ if (GGML_HIP_UMA)
477
+ add_compile_definitions(GGML_HIP_UMA)
478
+ endif()
479
+
480
+ if (GGML_CUDA_FORCE_DMMV)
481
+ add_compile_definitions(GGML_CUDA_FORCE_DMMV)
482
+ endif()
483
+
484
+ if (GGML_CUDA_FORCE_MMQ)
485
+ add_compile_definitions(GGML_CUDA_FORCE_MMQ)
486
+ endif()
487
+
488
+ if (GGML_CUDA_FORCE_CUBLAS)
489
+ add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
490
+ endif()
491
+
492
+ if (GGML_CUDA_NO_PEER_COPY)
493
+ add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
494
+ endif()
495
+
496
+ if (CXX_IS_HIPCC)
497
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
498
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} hip::device)
499
+ else()
500
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
501
+ endif()
502
+
503
+ if (GGML_STATIC)
504
+ message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
505
+ endif()
506
+
507
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
508
+ endif()
509
+
510
+ if (GGML_SYCL)
511
+ if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
512
+ message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
513
+ endif()
514
+
515
+ check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
516
+ if ( DEFINED ENV{ONEAPI_ROOT})
517
+ message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
518
+ elseif(SUPPORTS_SYCL)
519
+ message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
520
+ If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
521
+ source /opt/intel/oneapi/setvars.sh")
522
+ else()
523
+ message(FATAL_ERROR, "C++ compiler lacks SYCL support.")
524
+ endif()
525
+ message(STATUS "SYCL found")
526
+ #todo: AOT
527
+
528
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL)
529
+
530
+ if (GGML_SYCL_F16)
531
+ add_compile_definitions(GGML_SYCL_F16)
532
+ endif()
533
+
534
+ if (GGML_CUDA_FORCE_MMQ)
535
+ add_compile_definitions(GGML_SYCL_FORCE_MMQ)
536
+ endif()
537
+
538
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
539
+
540
+ if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
541
+ add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
542
+ else()
543
+ add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
544
+ endif()
545
+
546
+ file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")
547
+ list(APPEND GGML_HEADERS_SYCL "../include/ggml-sycl.h")
548
+
549
+ file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
550
+ list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
551
+
552
+ if (WIN32)
553
+ find_package(IntelSYCL REQUIRED)
554
+ find_package(MKL REQUIRED)
555
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
556
+ else()
557
+ if (GGML_SYCL_TARGET STREQUAL "INTEL")
558
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
559
+ elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
560
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
561
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl pthread m dl onemkl)
562
+ endif()
563
+ endif()
564
+ endif()
565
+
566
+ if (GGML_RPC)
567
+ message(STATUS "RPC found")
568
+
569
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC)
570
+
571
+ if (WIN32)
572
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32)
573
+ endif()
574
+
575
+ set(GGML_HEADERS_RPC ../include/ggml-rpc.h)
576
+ set(GGML_SOURCES_RPC ggml-rpc.cpp)
577
+ endif()
578
+
579
+ if (GGML_VULKAN)
580
+ find_package(Vulkan COMPONENTS glslc REQUIRED)
581
+
582
+ if (Vulkan_FOUND)
583
+ message(STATUS "Vulkan found")
584
+
585
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN)
586
+
587
+ # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
588
+ # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
589
+ if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
590
+ add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
591
+ endif()
592
+
593
+ if (GGML_VULKAN_CHECK_RESULTS)
594
+ add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
595
+ endif()
596
+
597
+ if (GGML_VULKAN_DEBUG)
598
+ add_compile_definitions(GGML_VULKAN_DEBUG)
599
+ endif()
600
+
601
+ if (GGML_VULKAN_MEMORY_DEBUG)
602
+ add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
603
+ endif()
604
+
605
+ if (GGML_VULKAN_VALIDATE)
606
+ add_compile_definitions(GGML_VULKAN_VALIDATE)
607
+ endif()
608
+
609
+ if (GGML_VULKAN_RUN_TESTS)
610
+ add_compile_definitions(GGML_VULKAN_RUN_TESTS)
611
+ endif()
612
+
613
+ add_subdirectory(vulkan-shaders)
614
+
615
+ set (_ggml_vk_genshaders_cmd vulkan-shaders-gen)
616
+ set (_ggml_vk_header ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp)
617
+ set (_ggml_vk_source ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp)
618
+ set (_ggml_vk_input_dir ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders)
619
+ set (_ggml_vk_output_dir ${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv)
620
+
621
+ file(GLOB _ggml_vk_shader_deps "${_ggml_vk_input_dir}/*.comp")
622
+
623
+ add_custom_command(
624
+ OUTPUT ${_ggml_vk_header}
625
+ ${_ggml_vk_source}
626
+
627
+ COMMAND ${_ggml_vk_genshaders_cmd}
628
+ --glslc ${Vulkan_GLSLC_EXECUTABLE}
629
+ --input-dir ${_ggml_vk_input_dir}
630
+ --output-dir ${_ggml_vk_output_dir}
631
+ --target-hpp ${_ggml_vk_header}
632
+ --target-cpp ${_ggml_vk_source}
633
+ --no-clean
634
+
635
+ DEPENDS ${_ggml_vk_shader_deps}
636
+ COMMENT "Generate vulkan shaders"
637
+ )
638
+
639
+ set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header})
640
+ set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source})
641
+
642
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan)
643
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
644
+ else()
645
+ message(WARNING "Vulkan not found")
646
+ endif()
647
+ endif()
648
+
649
+ if (GGML_KOMPUTE)
650
+ add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
651
+
652
+ find_package(Vulkan COMPONENTS glslc REQUIRED)
653
+ find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
654
+
655
+ if (NOT glslc_executable)
656
+ message(FATAL_ERROR "glslc not found")
657
+ endif()
658
+
659
+ function(compile_shader)
660
+ set(options)
661
+ set(oneValueArgs)
662
+ set(multiValueArgs SOURCES)
663
+ cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
664
+ foreach(source ${compile_shader_SOURCES})
665
+ get_filename_component(filename ${source} NAME)
666
+ set(spv_file ${filename}.spv)
667
+ add_custom_command(
668
+ OUTPUT ${spv_file}
669
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
670
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
671
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
672
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
673
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
674
+ COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
675
+ COMMENT "Compiling ${source} to ${spv_file}"
676
+ )
677
+
678
+ get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
679
+ set(FILE_NAME "shader${RAW_FILE_NAME}")
680
+ string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
681
+ string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
682
+ string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
683
+ set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
684
+ message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
685
+ if(CMAKE_GENERATOR MATCHES "Visual Studio")
686
+ add_custom_command(
687
+ OUTPUT ${OUTPUT_HEADER_FILE}
688
+ COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
689
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
690
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
691
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
692
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
693
+ COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
694
+ COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
695
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
696
+ DEPENDS ${spv_file} xxd
697
+ COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
698
+ )
699
+ else()
700
+ add_custom_command(
701
+ OUTPUT ${OUTPUT_HEADER_FILE}
702
+ COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
703
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
704
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
705
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
706
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
707
+ COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
708
+ COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
709
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
710
+ DEPENDS ${spv_file} xxd
711
+ COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
712
+ )
713
+ endif()
714
+ endforeach()
715
+ endfunction()
716
+
717
+ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
718
+ message(STATUS "Kompute found")
719
+ set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
720
+ add_subdirectory(kompute)
721
+
722
+ # Compile our shaders
723
+ compile_shader(SOURCES
724
+ kompute-shaders/op_scale.comp
725
+ kompute-shaders/op_scale_8.comp
726
+ kompute-shaders/op_add.comp
727
+ kompute-shaders/op_addrow.comp
728
+ kompute-shaders/op_mul.comp
729
+ kompute-shaders/op_silu.comp
730
+ kompute-shaders/op_relu.comp
731
+ kompute-shaders/op_gelu.comp
732
+ kompute-shaders/op_softmax.comp
733
+ kompute-shaders/op_norm.comp
734
+ kompute-shaders/op_rmsnorm.comp
735
+ kompute-shaders/op_diagmask.comp
736
+ kompute-shaders/op_mul_mat_mat_f32.comp
737
+ kompute-shaders/op_mul_mat_f16.comp
738
+ kompute-shaders/op_mul_mat_q8_0.comp
739
+ kompute-shaders/op_mul_mat_q4_0.comp
740
+ kompute-shaders/op_mul_mat_q4_1.comp
741
+ kompute-shaders/op_mul_mat_q6_k.comp
742
+ kompute-shaders/op_getrows_f32.comp
743
+ kompute-shaders/op_getrows_f16.comp
744
+ kompute-shaders/op_getrows_q4_0.comp
745
+ kompute-shaders/op_getrows_q4_1.comp
746
+ kompute-shaders/op_getrows_q6_k.comp
747
+ kompute-shaders/op_rope_f16.comp
748
+ kompute-shaders/op_rope_f32.comp
749
+ kompute-shaders/op_cpy_f16_f16.comp
750
+ kompute-shaders/op_cpy_f16_f32.comp
751
+ kompute-shaders/op_cpy_f32_f16.comp
752
+ kompute-shaders/op_cpy_f32_f32.comp
753
+ )
754
+
755
+ # Create a custom target for our generated shaders
756
+ add_custom_target(generated_shaders DEPENDS
757
+ shaderop_scale.h
758
+ shaderop_scale_8.h
759
+ shaderop_add.h
760
+ shaderop_addrow.h
761
+ shaderop_mul.h
762
+ shaderop_silu.h
763
+ shaderop_relu.h
764
+ shaderop_gelu.h
765
+ shaderop_softmax.h
766
+ shaderop_norm.h
767
+ shaderop_rmsnorm.h
768
+ shaderop_diagmask.h
769
+ shaderop_mul_mat_mat_f32.h
770
+ shaderop_mul_mat_f16.h
771
+ shaderop_mul_mat_q8_0.h
772
+ shaderop_mul_mat_q4_0.h
773
+ shaderop_mul_mat_q4_1.h
774
+ shaderop_mul_mat_q6_k.h
775
+ shaderop_getrows_f32.h
776
+ shaderop_getrows_f16.h
777
+ shaderop_getrows_q4_0.h
778
+ shaderop_getrows_q4_1.h
779
+ shaderop_getrows_q6_k.h
780
+ shaderop_rope_f16.h
781
+ shaderop_rope_f32.h
782
+ shaderop_cpy_f16_f16.h
783
+ shaderop_cpy_f16_f32.h
784
+ shaderop_cpy_f32_f16.h
785
+ shaderop_cpy_f32_f32.h
786
+ )
787
+
788
+ # Create a custom command that depends on the generated_shaders
789
+ add_custom_command(
790
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
791
+ COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
792
+ DEPENDS generated_shaders
793
+ COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
794
+ )
795
+
796
+ # Add the stamp to the main sources to ensure dependency tracking
797
+ set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
798
+ set(GGML_HEADERS_KOMPUTE ../include/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
799
+
800
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE)
801
+
802
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute)
803
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
804
+ else()
805
+ message(WARNING "Kompute not found")
806
+ endif()
807
+ endif()
808
+
809
+ if (GGML_CPU_HBM)
810
+ find_library(memkind memkind REQUIRED)
811
+
812
+ message(STATUS "Using memkind for CPU HBM")
813
+
814
+ add_compile_definitions(GGML_USE_CPU_HBM)
815
+
816
+ target_link_libraries(ggml PUBLIC memkind)
817
+ endif()
818
+
819
+ if (GGML_CANN)
820
+ if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
821
+ set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
822
+ message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
823
+ endif()
824
+
825
+ if (CANN_INSTALL_DIR)
826
+ # Only Support Linux.
827
+ if (GGML_CANN)
828
+ if (NOT UNIX)
829
+ set(GGML_CANN OFF)
830
+ message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
831
+ endif()
832
+ endif()
833
+
834
+ # Supported platforms: x86-64, arm64
835
+ if (GGML_CANN)
836
+ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
837
+ elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
838
+ else()
839
+ set(GGML_CANN OFF)
840
+ message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
841
+ endif()
842
+ endif()
843
+
844
+ # Set header and libs
845
+ if(GGML_CANN)
846
+ set(CANN_INCLUDE_DIRS
847
+ ${CANN_INSTALL_DIR}/include
848
+ ${CANN_INSTALL_DIR}/include/aclnn
849
+ ${CANN_INSTALL_DIR}/acllib/include
850
+ )
851
+
852
+ # TODO: find libs
853
+ link_directories(
854
+ ${CANN_INSTALL_DIR}/lib64
855
+ )
856
+
857
+ add_subdirectory(ggml-cann/kernels)
858
+ list(APPEND CANN_LIBRARIES
859
+ ascendcl
860
+ nnopbase
861
+ opapi
862
+ acl_op_compiler
863
+ ascendc_kernels
864
+ )
865
+
866
+ set(GGML_HEADERS_CANN "../include/ggml-cann.h")
867
+ file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
868
+ list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
869
+
870
+ message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
871
+ message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
872
+
873
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
874
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
875
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
876
+ endif()
877
+ else()
878
+ set(GGML_CANN OFF)
879
+ message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
880
+ endif()
881
+
882
+ if(NOT GGML_CANN)
883
+ message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
884
+ endif()
885
+ endif()
886
+
887
+ function(get_flags CCID CCVER)
888
+ set(C_FLAGS "")
889
+ set(CXX_FLAGS "")
890
+
891
+ if (CCID MATCHES "Clang")
892
+ set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
893
+ set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
894
+
895
+ if (
896
+ (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
897
+ (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
898
+ )
899
+ list(APPEND C_FLAGS -Wdouble-promotion)
900
+ endif()
901
+ elseif (CCID STREQUAL "GNU")
902
+ set(C_FLAGS -Wdouble-promotion)
903
+ set(CXX_FLAGS -Wno-array-bounds)
904
+
905
+ if (NOT GGML_MUSA)
906
+ if (CCVER VERSION_GREATER_EQUAL 7.1.0)
907
+ list(APPEND CXX_FLAGS -Wno-format-truncation)
908
+ endif()
909
+ endif()
910
+ if (CCVER VERSION_GREATER_EQUAL 8.1.0)
911
+ list(APPEND CXX_FLAGS -Wextra-semi)
912
+ endif()
913
+ endif()
914
+
915
+ set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
916
+ set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
917
+ endfunction()
918
+
919
+ if (GGML_FATAL_WARNINGS)
920
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
921
+ list(APPEND C_FLAGS -Werror)
922
+ list(APPEND CXX_FLAGS -Werror)
923
+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
924
+ add_compile_options(/WX)
925
+ endif()
926
+ endif()
927
+
928
+ if (GGML_ALL_WARNINGS)
929
+ if (NOT MSVC)
930
+ list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
931
+ list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
932
+ -Werror=implicit-int -Werror=implicit-function-declaration)
933
+ list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
934
+
935
+ list(APPEND C_FLAGS ${WARNING_FLAGS})
936
+ list(APPEND CXX_FLAGS ${WARNING_FLAGS})
937
+
938
+ get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
939
+
940
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
941
+ "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
942
+ else()
943
+ # todo : msvc
944
+ set(C_FLAGS "")
945
+ set(CXX_FLAGS "")
946
+ endif()
947
+ endif()
948
+
949
+ set(CUDA_CXX_FLAGS "")
950
+
951
+ if (GGML_CUDA)
952
+ set(CUDA_FLAGS -use_fast_math)
953
+
954
+ if (GGML_FATAL_WARNINGS)
955
+ list(APPEND CUDA_FLAGS -Werror all-warnings)
956
+ endif()
957
+
958
+ if (GGML_ALL_WARNINGS AND NOT MSVC)
959
+ set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
960
+ if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
961
+ list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
962
+ endif()
963
+
964
+ execute_process(
965
+ COMMAND ${NVCC_CMD} -Xcompiler --version
966
+ OUTPUT_VARIABLE CUDA_CCFULLVER
967
+ ERROR_QUIET
968
+ )
969
+
970
+ if (NOT CUDA_CCFULLVER MATCHES clang)
971
+ set(CUDA_CCID "GNU")
972
+ execute_process(
973
+ COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
974
+ OUTPUT_VARIABLE CUDA_CCVER
975
+ ERROR_QUIET
976
+ )
977
+ else()
978
+ if (CUDA_CCFULLVER MATCHES Apple)
979
+ set(CUDA_CCID "AppleClang")
980
+ else()
981
+ set(CUDA_CCID "Clang")
982
+ endif()
983
+ string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
984
+ endif()
985
+
986
+ message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
987
+
988
+ get_flags(${CUDA_CCID} ${CUDA_CCVER})
989
+ list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
990
+ endif()
991
+
992
+ if (NOT MSVC)
993
+ list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
994
+ endif()
995
+ endif()
996
+
997
+ if (GGML_LTO)
998
+ include(CheckIPOSupported)
999
+ check_ipo_supported(RESULT result OUTPUT output)
1000
+ if (result)
1001
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
1002
+ else()
1003
+ message(WARNING "IPO is not supported: ${output}")
1004
+ endif()
1005
+ endif()
1006
+
1007
+ if (GGML_CCACHE)
1008
+ find_program(GGML_CCACHE_FOUND ccache)
1009
+
1010
+ if (GGML_CCACHE_FOUND)
1011
+ # TODO: should not be set globally
1012
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
1013
+ set(ENV{CCACHE_SLOPPINESS} time_macros)
1014
+ message(STATUS "ccache found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
1015
+ else()
1016
+ message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF")
1017
+ endif ()
1018
+ endif()
1019
+
1020
+ # this version of Apple ld64 is buggy
1021
+ execute_process(
1022
+ COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
1023
+ ERROR_VARIABLE output
1024
+ OUTPUT_QUIET
1025
+ )
1026
+
1027
+ if (output MATCHES "dyld-1015\.7")
1028
+ add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
1029
+ endif()
1030
+
1031
+ # architecture specific
1032
+ # TODO: probably these flags need to be tweaked on some architectures
1033
+ # feel free to update the Makefile for your architecture and send a pull request or issue
1034
+ message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
1035
+ if (MSVC)
1036
+ string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
1037
+ message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
1038
+ else ()
1039
+ set(CMAKE_GENERATOR_PLATFORM_LWR "")
1040
+ endif ()
1041
+
1042
+ if (NOT MSVC)
1043
+ if (GGML_STATIC)
1044
+ add_link_options(-static)
1045
+ if (MINGW)
1046
+ add_link_options(-static-libgcc -static-libstdc++)
1047
+ endif()
1048
+ endif()
1049
+ if (GGML_GPROF)
1050
+ add_compile_options(-pg)
1051
+ endif()
1052
+ endif()
1053
+
1054
+ set(ARCH_FLAGS "")
1055
+
1056
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
1057
+ CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
1058
+ (NOT CMAKE_OSX_ARCHITECTURES AND
1059
+ NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1060
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
1061
+
1062
+ message(STATUS "ARM detected")
1063
+
1064
+ if (MSVC)
1065
+ add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
1066
+ add_compile_definitions(__ARM_NEON)
1067
+ add_compile_definitions(__ARM_FEATURE_FMA)
1068
+
1069
+ set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
1070
+ string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
1071
+
1072
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
1073
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
1074
+ add_compile_definitions(__ARM_FEATURE_DOTPROD)
1075
+ endif ()
1076
+
1077
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
1078
+
1079
+ if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
1080
+ add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
1081
+ endif ()
1082
+
1083
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1084
+ if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1085
+ add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
1086
+ endif ()
1087
+
1088
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
1089
+ else()
1090
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
1091
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
1092
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
1093
+ endif()
1094
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
1095
+ # Raspberry Pi 1, Zero
1096
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
1097
+ endif()
1098
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
1099
+ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
1100
+ # Android armeabi-v7a
1101
+ list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
1102
+ else()
1103
+ # Raspberry Pi 2
1104
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
1105
+ endif()
1106
+ endif()
1107
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
1108
+ # Android arm64-v8a
1109
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
1110
+ list(APPEND ARCH_FLAGS -mno-unaligned-access)
1111
+ endif()
1112
+ if (GGML_SVE)
1113
+ list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
1114
+ endif()
1115
+ endif()
1116
+ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
1117
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1118
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
1119
+ message(STATUS "x86 detected")
1120
+ if (MSVC)
1121
+ # instruction set detection for MSVC only
1122
+ if (GGML_NATIVE)
1123
+ # TODO: improve, should not reference files from the parent folder
1124
+ include(../cmake/FindSIMD.cmake)
1125
+ endif ()
1126
+ if (GGML_AVX512)
1127
+ list(APPEND ARCH_FLAGS /arch:AVX512)
1128
+ # MSVC has no compile-time flags enabling specific
1129
+ # AVX512 extensions, neither it defines the
1130
+ # macros corresponding to the extensions.
1131
+ # Do it manually.
1132
+ if (GGML_AVX512_VBMI)
1133
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
1134
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
1135
+ endif()
1136
+ if (GGML_AVX512_VNNI)
1137
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
1138
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
1139
+ endif()
1140
+ if (GGML_AVX512_BF16)
1141
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
1142
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
1143
+ endif()
1144
+ elseif (GGML_AVX2)
1145
+ list(APPEND ARCH_FLAGS /arch:AVX2)
1146
+ elseif (GGML_AVX)
1147
+ list(APPEND ARCH_FLAGS /arch:AVX)
1148
+ endif()
1149
+ else()
1150
+ if (GGML_NATIVE)
1151
+ list(APPEND ARCH_FLAGS -march=native)
1152
+ endif()
1153
+ if (GGML_F16C)
1154
+ list(APPEND ARCH_FLAGS -mf16c)
1155
+ endif()
1156
+ if (GGML_FMA)
1157
+ list(APPEND ARCH_FLAGS -mfma)
1158
+ endif()
1159
+ if (GGML_AVX)
1160
+ list(APPEND ARCH_FLAGS -mavx)
1161
+ endif()
1162
+ if (GGML_AVX2)
1163
+ list(APPEND ARCH_FLAGS -mavx2)
1164
+ endif()
1165
+ if (GGML_AVX512)
1166
+ list(APPEND ARCH_FLAGS -mavx512f)
1167
+ list(APPEND ARCH_FLAGS -mavx512bw)
1168
+ endif()
1169
+ if (GGML_AVX512_VBMI)
1170
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
1171
+ endif()
1172
+ if (GGML_AVX512_VNNI)
1173
+ list(APPEND ARCH_FLAGS -mavx512vnni)
1174
+ endif()
1175
+ if (GGML_AVX512_BF16)
1176
+ list(APPEND ARCH_FLAGS -mavx512bf16)
1177
+ endif()
1178
+ endif()
1179
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1180
+ message(STATUS "PowerPC detected")
1181
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
1182
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
1183
+ else()
1184
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
1185
+ #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
1186
+ endif()
1187
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
1188
+ message(STATUS "loongarch64 detected")
1189
+
1190
+ list(APPEND ARCH_FLAGS -march=loongarch64)
1191
+ if (GGML_LASX)
1192
+ list(APPEND ARCH_FLAGS -mlasx)
1193
+ endif()
1194
+ if (GGML_LSX)
1195
+ list(APPEND ARCH_FLAGS -mlsx)
1196
+ endif()
1197
+ else()
1198
+ message(STATUS "Unknown architecture")
1199
+ endif()
1200
+
1201
+ add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
1202
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
1203
+
1204
+ if (GGML_CUDA)
1205
+ list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
1206
+ list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
1207
+
1208
+ if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
1209
+ list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
1210
+ endif()
1211
+
1212
+ add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
1213
+ endif()
1214
+
1215
+ if (MINGW)
1216
+ # Target Windows 8 for PrefetchVirtualMemory
1217
+ add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
1218
+ endif()
1219
+
1220
+ #
1221
+ # POSIX conformance
1222
+ #
1223
+
1224
+ # clock_gettime came in POSIX.1b (1993)
1225
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
1226
+ # posix_memalign came in POSIX.1-2001 / SUSv3
1227
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
1228
+ add_compile_definitions(_XOPEN_SOURCE=600)
1229
+
1230
+ # Somehow in OpenBSD whenever POSIX conformance is specified
1231
+ # some string functions rely on locale_t availability,
1232
+ # which was introduced in POSIX.1-2008, forcing us to go higher
1233
+ if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1234
+ remove_definitions(-D_XOPEN_SOURCE=600)
1235
+ add_compile_definitions(_XOPEN_SOURCE=700)
1236
+ endif()
1237
+
1238
+ # Data types, macros and functions related to controlling CPU affinity and
1239
+ # some memory allocation are available on Linux through GNU extensions in libc
1240
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
1241
+ add_compile_definitions(_GNU_SOURCE)
1242
+ endif()
1243
+
1244
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
1245
+ # and on macOS its availability depends on enabling Darwin extensions
1246
+ # similarly on DragonFly, enabling BSD extensions is necessary
1247
+ if (
1248
+ CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
1249
+ CMAKE_SYSTEM_NAME MATCHES "iOS" OR
1250
+ CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
1251
+ CMAKE_SYSTEM_NAME MATCHES "DragonFly"
1252
+ )
1253
+ add_compile_definitions(_DARWIN_C_SOURCE)
1254
+ endif()
1255
+
1256
+ # alloca is a non-standard interface that is not visible on BSDs when
1257
+ # POSIX conformance is specified, but not all of them provide a clean way
1258
+ # to enable it in such cases
1259
+ if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
1260
+ add_compile_definitions(__BSD_VISIBLE)
1261
+ endif()
1262
+ if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
1263
+ add_compile_definitions(_NETBSD_SOURCE)
1264
+ endif()
1265
+ if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1266
+ add_compile_definitions(_BSD_SOURCE)
1267
+ endif()
1268
+
1269
+ if (WIN32)
1270
+ add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
1271
+
1272
+ if (BUILD_SHARED_LIBS)
1273
+ # TODO: should not use this
1274
+ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
1275
+ endif()
1276
+ endif()
1277
+
1278
+ #
1279
+ # libraries
1280
+ #
1281
+
1282
+ # ggml
1283
+
1284
+ add_library(ggml
1285
+ ../include/ggml.h
1286
+ ../include/ggml-alloc.h
1287
+ ../include/ggml-backend.h
1288
+ ggml.c
1289
+ ggml-alloc.c
1290
+ ggml-backend.c
1291
+ ggml-quants.c
1292
+ ggml-quants.h
1293
+ ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1294
+ ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1295
+ ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
1296
+ ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1297
+ ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1298
+ ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1299
+ ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1300
+ ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1301
+ ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
1302
+ ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1303
+ ${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
1304
+ ggml-aarch64.c ggml-aarch64.h
1305
+ )
1306
+
1307
+ if (EMSCRIPTEN)
1308
+ set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128")
1309
+ endif()
1310
+
1311
+ target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
1312
+ target_include_directories(ggml PUBLIC ../include)
1313
+ target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
1314
+ target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
1315
+ target_compile_features (ggml PRIVATE c_std_11) # don't bump
1316
+
1317
+ target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS})
1318
+
1319
+ find_library(MATH_LIBRARY m)
1320
+ if (MATH_LIBRARY)
1321
+ if (NOT WIN32 OR NOT GGML_SYCL)
1322
+ target_link_libraries(ggml PRIVATE ${MATH_LIBRARY})
1323
+ endif()
1324
+ endif()
1325
+
1326
+ if (BUILD_SHARED_LIBS)
1327
+ set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
1328
+ target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)
1329
+ endif()