@fugood/llama.node 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. package/CMakeLists.txt +5 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +1 -1
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/LoadSessionWorker.cpp +1 -0
  23. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  24. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  27. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  28. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  29. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  31. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  32. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  33. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  37. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  38. package/src/llama.cpp/CMakeLists.txt +91 -1245
  39. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  40. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  41. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  42. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  43. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  44. package/src/llama.cpp/common/common.cpp +1116 -877
  45. package/src/llama.cpp/common/common.h +191 -77
  46. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  47. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  48. package/src/llama.cpp/common/log.h +1 -1
  49. package/src/llama.cpp/common/ngram-cache.h +10 -3
  50. package/src/llama.cpp/common/sampling.cpp +19 -10
  51. package/src/llama.cpp/docs/build.md +353 -0
  52. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  53. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  55. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  57. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  59. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  61. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  63. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  64. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  65. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  66. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  67. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  68. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  69. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  70. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  71. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  72. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  73. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  74. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  76. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  77. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  78. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  80. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  87. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  88. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  89. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  90. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  91. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  92. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  93. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  94. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  95. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  97. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  98. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  99. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  100. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  102. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  103. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  104. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  105. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  106. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  107. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  108. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  109. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  110. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  111. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  112. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  113. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  114. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  115. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  116. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  117. package/src/llama.cpp/examples/main/main.cpp +98 -75
  118. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  119. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  120. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  121. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  122. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  123. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  124. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  125. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  126. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  127. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  129. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  130. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  131. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  133. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  134. package/src/llama.cpp/examples/server/server.cpp +274 -671
  135. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  136. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  137. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  138. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  139. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  140. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  141. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  142. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  143. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  144. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  145. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  146. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  147. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  148. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  149. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  150. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  151. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  152. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  153. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  154. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  155. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  156. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  157. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  159. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  160. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  161. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  162. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  163. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  178. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  179. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  180. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  181. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  182. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  183. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  184. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  185. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  208. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  209. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  210. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  211. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  212. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  214. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  215. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  216. package/src/llama.cpp/models/.editorconfig +1 -0
  217. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  221. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  224. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  230. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  233. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  237. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  243. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  246. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  249. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  259. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  260. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  261. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  263. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  264. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  265. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  266. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  267. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  268. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  269. package/src/llama.cpp/requirements.txt +5 -4
  270. package/src/llama.cpp/scripts/build-info.sh +30 -0
  271. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  272. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  273. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  274. package/src/llama.cpp/src/llama-grammar.h +39 -0
  275. package/src/llama.cpp/src/llama-impl.h +26 -0
  276. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  277. package/src/llama.cpp/src/llama-sampling.h +56 -0
  278. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  279. package/src/llama.cpp/src/llama-vocab.h +130 -0
  280. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  281. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  282. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  283. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  284. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  285. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  286. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  287. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  289. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  290. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  291. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  292. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  293. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  294. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  295. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  296. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  297. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  298. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  299. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  300. package/bin/darwin/arm64/default.metallib +0 -0
  301. package/bin/darwin/x64/default.metallib +0 -0
  302. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  303. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  304. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  305. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  306. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  307. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  308. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  309. package/src/llama.cpp/ggml-opencl.h +0 -36
  310. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  311. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  314. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  315. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  316. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  317. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  318. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  319. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  320. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target
2
2
  project("llama.cpp" C CXX)
3
3
  include(CheckIncludeFileCXX)
4
4
 
5
+ #set(CMAKE_WARN_DEPRECATED YES)
6
+ set(CMAKE_WARN_UNUSED_CLI YES)
7
+
5
8
  set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
6
9
 
7
10
  if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -9,11 +12,16 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
9
12
  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
10
13
  endif()
11
14
 
15
+ # Add path to modules
16
+ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
17
+
12
18
  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
13
19
 
14
20
  if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
15
21
  set(LLAMA_STANDALONE ON)
16
22
 
23
+ include(git-vars)
24
+
17
25
  # configure project version
18
26
  # TODO
19
27
  else()
@@ -32,1285 +40,132 @@ else()
32
40
  endif()
33
41
  endif()
34
42
 
43
+ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
35
44
 
36
- #
37
- # Option list
38
- #
39
-
40
- if (APPLE)
41
- set(LLAMA_METAL_DEFAULT ON)
42
- else()
43
- set(LLAMA_METAL_DEFAULT OFF)
45
+ if (WIN32)
46
+ add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
44
47
  endif()
45
48
 
46
- set(LLAMA_LLAMAFILE_DEFAULT ON)
47
-
48
- # general
49
- option(BUILD_SHARED_LIBS "build shared libraries" OFF)
50
- option(LLAMA_STATIC "llama: static link libraries" OFF)
51
- option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
52
- option(LLAMA_LTO "llama: enable link time optimization" OFF)
53
- option(LLAMA_CCACHE "llama: use ccache if available" ON)
49
+ #
50
+ # option list
51
+ #
54
52
 
55
53
  # debug
56
- option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
57
- option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
58
- option(LLAMA_GPROF "llama: enable gprof" OFF)
54
+ option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
55
+ option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
59
56
 
60
57
  # build
61
- option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
58
+ option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
62
59
 
63
60
  # sanitizers
64
- option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
65
- option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
66
- option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
67
-
68
- # instruction set specific
69
- if (LLAMA_NATIVE)
70
- set(INS_ENB OFF)
71
- else()
72
- set(INS_ENB ON)
73
- endif()
61
+ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
62
+ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
63
+ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
74
64
 
75
- option(LLAMA_SVE "llama: enable SVE" OFF)
76
- option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
77
- option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
78
- option(LLAMA_AVX512 "llama: enable AVX512" OFF)
79
- option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
80
- option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
81
- option(LLAMA_AVX512_BF16 "llama: enable AVX512-BF16" OFF)
82
- option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
83
- # in MSVC F16C is implied with AVX2/AVX512
84
- if (NOT MSVC)
85
- option(LLAMA_F16C "llama: enable F16C" ${INS_ENB})
86
- endif()
87
-
88
- if (WIN32)
89
- set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version")
90
- endif()
65
+ # extra artifacts
66
+ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
67
+ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
68
+ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
91
69
 
92
70
  # 3rd party libs
93
- option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
94
- option(LLAMA_BLAS "llama: use BLAS" OFF)
95
- option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
96
- set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
97
- option(LLAMA_CUDA "llama: use CUDA" OFF)
98
- option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
99
- option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
100
- option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
101
- set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
102
- set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
103
- option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
104
- set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
105
- set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
106
- "llama: max. batch size for using peer access")
107
- option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF)
108
- option(LLAMA_CUDA_NO_VMM "llama: do not try to use CUDA VMM" OFF)
109
-
110
- option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
111
- option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
112
- option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
113
- option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
114
- option(LLAMA_VULKAN "llama: use Vulkan" OFF)
115
- option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
116
- option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
117
- option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF)
118
- option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF)
119
- option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
120
- option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
121
- option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
122
- option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF)
123
- set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
124
- "llama: metal minimum macOS version")
125
- set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
126
- option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
127
- option(LLAMA_RPC "llama: use RPC" OFF)
128
- option(LLAMA_SYCL "llama: use SYCL" OFF)
129
- option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
130
- set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
131
- option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
132
- set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
133
-
134
- option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
135
- option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
136
- option(LLAMA_BUILD_SERVER "llama: build server example" ON)
137
- option(LLAMA_LASX "llama: enable lasx" ON)
138
- option(LLAMA_LSX "llama: enable lsx" ON)
139
-
140
- # add perf arguments
141
- option(LLAMA_PERF "llama: enable perf" OFF)
71
+ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
142
72
 
143
73
  # Required for relocatable CMake package
144
- include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
145
-
146
- #
147
- # Compile flags
148
- #
149
-
150
- if (LLAMA_SYCL)
151
- set(CMAKE_CXX_STANDARD 17)
152
- else()
153
- set(CMAKE_CXX_STANDARD 11)
154
- endif()
155
-
156
- set(CMAKE_CXX_STANDARD_REQUIRED true)
157
- set(CMAKE_C_STANDARD 11)
158
- set(CMAKE_C_STANDARD_REQUIRED true)
159
- set(THREADS_PREFER_PTHREAD_FLAG ON)
160
-
161
- find_package(Threads REQUIRED)
162
- include(CheckCXXCompilerFlag)
163
-
164
- add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
165
-
166
- # enable libstdc++ assertions for debug builds
167
- if (CMAKE_SYSTEM_NAME MATCHES "Linux")
168
- add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
169
- endif()
170
-
171
- if (NOT MSVC)
172
- if (LLAMA_SANITIZE_THREAD)
173
- add_compile_options(-fsanitize=thread)
174
- link_libraries (-fsanitize=thread)
175
- endif()
176
-
177
- if (LLAMA_SANITIZE_ADDRESS)
178
- add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
179
- link_libraries (-fsanitize=address)
180
- endif()
181
-
182
- if (LLAMA_SANITIZE_UNDEFINED)
183
- add_compile_options(-fsanitize=undefined)
184
- link_libraries (-fsanitize=undefined)
185
- endif()
186
- endif()
187
-
188
- if (APPLE AND LLAMA_ACCELERATE)
189
- find_library(ACCELERATE_FRAMEWORK Accelerate)
190
- if (ACCELERATE_FRAMEWORK)
191
- message(STATUS "Accelerate framework found")
192
-
193
- add_compile_definitions(GGML_USE_ACCELERATE)
194
- add_compile_definitions(ACCELERATE_NEW_LAPACK)
195
- add_compile_definitions(ACCELERATE_LAPACK_ILP64)
196
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
197
- else()
198
- message(WARNING "Accelerate framework not found")
199
- endif()
200
- endif()
201
-
202
- if (LLAMA_METAL)
203
- find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
204
- find_library(METAL_FRAMEWORK Metal REQUIRED)
205
- find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
206
-
207
- message(STATUS "Metal framework found")
208
- set(GGML_HEADERS_METAL ggml-metal.h)
209
- set(GGML_SOURCES_METAL ggml-metal.m)
210
-
211
- add_compile_definitions(GGML_USE_METAL)
212
- if (LLAMA_METAL_NDEBUG)
213
- add_compile_definitions(GGML_METAL_NDEBUG)
214
- endif()
215
-
216
- # copy ggml-common.h and ggml-metal.metal to bin directory
217
- configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
218
- configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
219
-
220
- if (LLAMA_METAL_EMBED_LIBRARY)
221
- enable_language(ASM)
222
- add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
223
-
224
- set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
225
- set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
226
-
227
- file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
228
-
229
- # merge ggml-common.h and ggml-metal.metal into a single file
230
- set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
231
- set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
232
-
233
- add_custom_command(
234
- OUTPUT ${METALLIB_EMBED_ASM}
235
- COMMAND echo "Embedding Metal library"
236
- COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
237
- COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
238
- COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
239
- COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
240
- COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
241
- COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
242
- COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
243
- DEPENDS ggml-metal.metal ggml-common.h
244
- COMMENT "Generate assembly for embedded Metal library"
245
- )
246
-
247
- set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
248
- else()
249
- if (LLAMA_METAL_SHADER_DEBUG)
250
- # custom command to do the following:
251
- # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
252
- # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
253
- #
254
- # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
255
- # disabling fast math is needed in order to pass tests/test-backend-ops
256
- # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
257
- # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
258
- # ref: https://github.com/ggerganov/whisper.cpp/issues/1720
259
- set(XC_FLAGS -fno-fast-math -fno-inline -g)
260
- else()
261
- set(XC_FLAGS -O3)
262
- endif()
263
-
264
- # Append macOS metal versioning flags
265
- if (LLAMA_METAL_MACOSX_VERSION_MIN)
266
- message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
267
- list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN})
268
- endif()
269
- if (LLAMA_METAL_STD)
270
- message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation")
271
- list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD})
272
- endif()
273
-
274
- add_custom_command(
275
- OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
276
- COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
277
- COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
278
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
279
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
280
- COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
281
- DEPENDS ggml-metal.metal ggml-common.h
282
- COMMENT "Compiling Metal kernels"
283
- )
284
-
285
- add_custom_target(
286
- ggml-metal ALL
287
- DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
288
- )
289
- endif() # LLAMA_METAL_EMBED_LIBRARY
290
-
291
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
292
- ${FOUNDATION_LIBRARY}
293
- ${METAL_FRAMEWORK}
294
- ${METALKIT_FRAMEWORK}
295
- )
296
- endif()
297
-
298
- if (LLAMA_BLAS)
299
- if (LLAMA_STATIC)
300
- set(BLA_STATIC ON)
301
- endif()
302
- if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
303
- set(BLA_SIZEOF_INTEGER 8)
304
- endif()
305
-
306
- set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
307
- find_package(BLAS)
308
-
309
- if (BLAS_FOUND)
310
- message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
311
-
312
- if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
313
- # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
314
- # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
315
- find_package(PkgConfig REQUIRED)
316
- if (${LLAMA_BLAS_VENDOR} MATCHES "Generic")
317
- pkg_check_modules(DepBLAS REQUIRED blas)
318
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS")
319
- # As of openblas v0.3.22, the 64-bit is named openblas64.pc
320
- pkg_check_modules(DepBLAS openblas64)
321
- if (NOT DepBLAS_FOUND)
322
- pkg_check_modules(DepBLAS REQUIRED openblas)
323
- endif()
324
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME")
325
- pkg_check_modules(DepBLAS REQUIRED blis)
326
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS")
327
- pkg_check_modules(DepBLAS REQUIRED blas-atlas)
328
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS")
329
- pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
330
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel")
331
- # all Intel* libraries share the same include path
332
- pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
333
- elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC")
334
- # this doesn't provide pkg-config
335
- # suggest to assign BLAS_INCLUDE_DIRS on your own
336
- if ("${NVHPC_VERSION}" STREQUAL "")
337
- message(WARNING "Better to set NVHPC_VERSION")
338
- else()
339
- set(DepBLAS_FOUND ON)
340
- set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
341
- endif()
342
- endif()
343
- if (DepBLAS_FOUND)
344
- set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
345
- else()
346
- message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
347
- " detected by pkgconfig, trying to find cblas.h from possible paths...")
348
- find_path(BLAS_INCLUDE_DIRS
349
- NAMES cblas.h
350
- HINTS
351
- /usr/include
352
- /usr/local/include
353
- /usr/include/openblas
354
- /opt/homebrew/opt/openblas/include
355
- /usr/local/opt/openblas/include
356
- /usr/include/x86_64-linux-gnu/openblas/include
357
- )
358
- endif()
359
- endif()
360
-
361
- message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
362
-
363
- add_compile_options(${BLAS_LINKER_FLAGS})
364
-
365
- add_compile_definitions(GGML_USE_OPENBLAS)
366
-
367
- if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
368
- add_compile_definitions(GGML_BLAS_USE_MKL)
369
- endif()
370
-
371
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
372
- set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
373
- else()
374
- message(WARNING "BLAS not found, please refer to "
375
- "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
376
- " to set correct LLAMA_BLAS_VENDOR")
377
- endif()
378
- endif()
379
-
380
- if (LLAMA_LLAMAFILE)
381
- add_compile_definitions(GGML_USE_LLAMAFILE)
382
-
383
- set(GGML_HEADERS_LLAMAFILE sgemm.h)
384
- set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
385
- endif()
386
-
387
- if (LLAMA_CUBLAS)
388
- message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
389
- set(LLAMA_CUDA ON)
390
- endif()
391
-
392
- if (LLAMA_CUDA)
393
- cmake_minimum_required(VERSION 3.17)
394
-
395
- find_package(CUDAToolkit)
396
- if (CUDAToolkit_FOUND)
397
- message(STATUS "CUDA found")
398
-
399
- enable_language(CUDA)
400
-
401
- set(GGML_HEADERS_CUDA ggml-cuda.h)
402
-
403
- file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
404
- list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
405
-
406
- add_compile_definitions(GGML_USE_CUDA)
407
- add_compile_definitions(GGML_CUDA_USE_GRAPHS)
408
- if (LLAMA_CUDA_FORCE_DMMV)
409
- add_compile_definitions(GGML_CUDA_FORCE_DMMV)
410
- endif()
411
- if (LLAMA_CUDA_FORCE_MMQ)
412
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
413
- endif()
414
- if (LLAMA_CUDA_NO_VMM)
415
- add_compile_definitions(GGML_CUDA_NO_VMM)
416
- endif()
417
- add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
418
- add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
419
- if (DEFINED LLAMA_CUDA_DMMV_Y)
420
- add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
421
- endif()
422
- if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
423
- add_compile_definitions(GGML_CUDA_F16)
424
- endif()
425
- add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
426
- add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
427
- if (LLAMA_CUDA_NO_PEER_COPY)
428
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
429
- endif()
430
-
431
- if (LLAMA_STATIC)
432
- if (WIN32)
433
- # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
434
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
435
- else ()
436
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
437
- endif()
438
- else()
439
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
440
- endif()
441
-
442
- if (LLAMA_CUDA_NO_VMM)
443
- # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
444
- else()
445
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
446
- endif()
447
-
448
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
449
- # 52 == lowest CUDA 12 standard
450
- # 60 == f16 CUDA intrinsics
451
- # 61 == integer CUDA intrinsics
452
- # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
453
- if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
454
- set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
455
- else()
456
- set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
457
- #set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work
458
- endif()
459
- endif()
460
- message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
461
-
462
- else()
463
- message(WARNING "CUDA not found")
464
- endif()
465
- endif()
466
-
467
- if (LLAMA_RPC)
468
- add_compile_definitions(GGML_USE_RPC)
469
-
470
- if (WIN32)
471
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32)
472
- endif()
473
-
474
- set(GGML_HEADERS_RPC ggml-rpc.h)
475
- set(GGML_SOURCES_RPC ggml-rpc.cpp)
476
- endif()
477
-
478
- if (LLAMA_CLBLAST)
479
- find_package(CLBlast)
480
- if (CLBlast_FOUND)
481
- message(STATUS "CLBlast found")
482
-
483
- set(GGML_HEADERS_OPENCL ggml-opencl.h)
484
- set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
485
-
486
- add_compile_definitions(GGML_USE_CLBLAST)
487
-
488
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
489
- else()
490
- message(WARNING "CLBlast not found")
491
- endif()
492
- endif()
493
-
494
- if (LLAMA_VULKAN)
495
- find_package(Vulkan)
496
- if (Vulkan_FOUND)
497
- message(STATUS "Vulkan found")
498
-
499
- set(GGML_HEADERS_VULKAN ggml-vulkan.h)
500
- set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
501
-
502
- add_compile_definitions(GGML_USE_VULKAN)
503
-
504
- # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
505
- # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
506
- if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
507
- add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
508
- endif()
509
-
510
- if (LLAMA_VULKAN_CHECK_RESULTS)
511
- add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
512
- endif()
513
-
514
- if (LLAMA_VULKAN_DEBUG)
515
- add_compile_definitions(GGML_VULKAN_DEBUG)
516
- endif()
517
-
518
- if (LLAMA_VULKAN_VALIDATE)
519
- add_compile_definitions(GGML_VULKAN_VALIDATE)
520
- endif()
521
-
522
- if (LLAMA_VULKAN_RUN_TESTS)
523
- add_compile_definitions(GGML_VULKAN_RUN_TESTS)
524
- endif()
525
-
526
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)
527
- else()
528
- message(WARNING "Vulkan not found")
529
- endif()
530
- endif()
531
-
532
- if (LLAMA_HIPBLAS)
533
- if ($ENV{ROCM_PATH})
534
- set(ROCM_PATH $ENV{ROCM_PATH})
535
- else()
536
- set(ROCM_PATH /opt/rocm)
537
- endif()
538
- list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
539
-
540
- # CMake on Windows doesn't support the HIP language yet
541
- if(WIN32)
542
- set(CXX_IS_HIPCC TRUE)
543
- else()
544
- string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
545
- endif()
546
-
547
- if(CXX_IS_HIPCC)
548
- if(LINUX)
549
- if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
550
- message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
551
- endif()
552
-
553
- message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
554
- " Prefer setting the HIP compiler directly. See README for details.")
555
- endif()
556
- else()
557
- # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
558
- if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
559
- set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
560
- endif()
561
- cmake_minimum_required(VERSION 3.21)
562
- enable_language(HIP)
563
- endif()
564
- find_package(hip REQUIRED)
565
- find_package(hipblas REQUIRED)
566
- find_package(rocblas REQUIRED)
567
-
568
- message(STATUS "HIP and hipBLAS found")
569
-
570
- set(GGML_HEADERS_ROCM ggml-cuda.h)
571
-
572
- file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
573
- list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
574
-
575
- add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
576
-
577
- if (LLAMA_HIP_UMA)
578
- add_compile_definitions(GGML_HIP_UMA)
579
- endif()
580
-
581
- if (LLAMA_CUDA_FORCE_DMMV)
582
- add_compile_definitions(GGML_CUDA_FORCE_DMMV)
583
- endif()
584
-
585
- if (LLAMA_CUDA_FORCE_MMQ)
586
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
587
- endif()
588
-
589
- if (LLAMA_CUDA_NO_PEER_COPY)
590
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
591
- endif()
592
-
593
- add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
594
- add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
595
- add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
596
-
597
- if (CXX_IS_HIPCC)
598
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
599
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
600
- else()
601
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
602
- endif()
74
+ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
603
75
 
604
- if (LLAMA_STATIC)
605
- message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
606
- endif()
76
+ # override ggml options
77
+ set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
78
+ set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
79
+ set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
80
+ set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
81
+ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
607
82
 
608
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
83
+ # change the default for these ggml options
84
+ if (NOT DEFINED GGML_LLAMAFILE)
85
+ set(GGML_LLAMAFILE ON)
609
86
  endif()
610
87
 
611
- if (LLAMA_SYCL)
612
- if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
613
- message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
614
- endif()
615
-
616
- if ( NOT DEFINED ENV{ONEAPI_ROOT})
617
- message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
618
- endif()
619
- #todo: AOT
620
-
621
- find_package(IntelSYCL REQUIRED)
622
-
623
- message(STATUS "SYCL found")
624
-
625
- add_compile_definitions(GGML_USE_SYCL)
626
-
627
- if (LLAMA_SYCL_F16)
628
- add_compile_definitions(GGML_SYCL_F16)
629
- endif()
630
-
631
- add_compile_options(-I./) #include DPCT
632
- add_compile_options(-I/${SYCL_INCLUDE_DIR})
633
-
634
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
635
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
636
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
637
- if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
638
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
639
- endif()
640
-
641
- set(GGML_HEADERS_SYCL ggml-sycl.h)
642
- set(GGML_SOURCES_SYCL ggml-sycl.cpp)
643
-
644
- if (WIN32)
645
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
646
- else()
647
- if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
648
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
649
- elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
650
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
651
- endif()
652
- endif()
88
+ if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
89
+ set(GGML_CUDA_USE_GRAPHS ON)
653
90
  endif()
654
91
 
655
- if (LLAMA_KOMPUTE)
656
- add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
657
- find_package(Vulkan COMPONENTS glslc REQUIRED)
658
- find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
659
- if (NOT glslc_executable)
660
- message(FATAL_ERROR "glslc not found")
92
+ # transition helpers
93
+ function (llama_option_depr TYPE OLD NEW)
94
+ if (${OLD})
95
+ message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
96
+ set(${NEW} ON PARENT_SCOPE)
661
97
  endif()
662
-
663
- function(compile_shader)
664
- set(options)
665
- set(oneValueArgs)
666
- set(multiValueArgs SOURCES)
667
- cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
668
- foreach(source ${compile_shader_SOURCES})
669
- get_filename_component(filename ${source} NAME)
670
- set(spv_file ${filename}.spv)
671
- add_custom_command(
672
- OUTPUT ${spv_file}
673
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
674
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
675
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
676
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
677
- ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
678
- COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
679
- COMMENT "Compiling ${source} to ${spv_file}"
680
- )
681
-
682
- get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
683
- set(FILE_NAME "shader${RAW_FILE_NAME}")
684
- string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
685
- string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
686
- string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
687
- set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
688
- message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
689
- if(CMAKE_GENERATOR MATCHES "Visual Studio")
690
- add_custom_command(
691
- OUTPUT ${OUTPUT_HEADER_FILE}
692
- COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
693
- COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
694
- COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
695
- COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
696
- COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
697
- COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
698
- COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
699
- COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
700
- DEPENDS ${spv_file} xxd
701
- COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
702
- )
703
- else()
704
- add_custom_command(
705
- OUTPUT ${OUTPUT_HEADER_FILE}
706
- COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
707
- COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
708
- COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
709
- COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
710
- COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
711
- COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
712
- COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
713
- COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
714
- DEPENDS ${spv_file} xxd
715
- COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
716
- )
717
- endif()
718
- endforeach()
719
- endfunction()
720
-
721
- if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
722
- message(STATUS "Kompute found")
723
- set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
724
- add_subdirectory(kompute)
725
-
726
- # Compile our shaders
727
- compile_shader(SOURCES
728
- kompute-shaders/op_scale.comp
729
- kompute-shaders/op_scale_8.comp
730
- kompute-shaders/op_add.comp
731
- kompute-shaders/op_addrow.comp
732
- kompute-shaders/op_mul.comp
733
- kompute-shaders/op_silu.comp
734
- kompute-shaders/op_relu.comp
735
- kompute-shaders/op_gelu.comp
736
- kompute-shaders/op_softmax.comp
737
- kompute-shaders/op_norm.comp
738
- kompute-shaders/op_rmsnorm.comp
739
- kompute-shaders/op_diagmask.comp
740
- kompute-shaders/op_mul_mat_mat_f32.comp
741
- kompute-shaders/op_mul_mat_f16.comp
742
- kompute-shaders/op_mul_mat_q8_0.comp
743
- kompute-shaders/op_mul_mat_q4_0.comp
744
- kompute-shaders/op_mul_mat_q4_1.comp
745
- kompute-shaders/op_mul_mat_q6_k.comp
746
- kompute-shaders/op_getrows_f16.comp
747
- kompute-shaders/op_getrows_q4_0.comp
748
- kompute-shaders/op_getrows_q4_1.comp
749
- kompute-shaders/op_getrows_q6_k.comp
750
- kompute-shaders/op_rope_f16.comp
751
- kompute-shaders/op_rope_f32.comp
752
- kompute-shaders/op_cpy_f16_f16.comp
753
- kompute-shaders/op_cpy_f16_f32.comp
754
- kompute-shaders/op_cpy_f32_f16.comp
755
- kompute-shaders/op_cpy_f32_f32.comp
756
- )
757
-
758
- # Create a custom target for our generated shaders
759
- add_custom_target(generated_shaders DEPENDS
760
- shaderop_scale.h
761
- shaderop_scale_8.h
762
- shaderop_add.h
763
- shaderop_addrow.h
764
- shaderop_mul.h
765
- shaderop_silu.h
766
- shaderop_relu.h
767
- shaderop_gelu.h
768
- shaderop_softmax.h
769
- shaderop_norm.h
770
- shaderop_rmsnorm.h
771
- shaderop_diagmask.h
772
- shaderop_mul_mat_mat_f32.h
773
- shaderop_mul_mat_f16.h
774
- shaderop_mul_mat_q8_0.h
775
- shaderop_mul_mat_q4_0.h
776
- shaderop_mul_mat_q4_1.h
777
- shaderop_mul_mat_q6_k.h
778
- shaderop_getrows_f16.h
779
- shaderop_getrows_q4_0.h
780
- shaderop_getrows_q4_1.h
781
- shaderop_getrows_q6_k.h
782
- shaderop_rope_f16.h
783
- shaderop_rope_f32.h
784
- shaderop_cpy_f16_f16.h
785
- shaderop_cpy_f16_f32.h
786
- shaderop_cpy_f32_f16.h
787
- shaderop_cpy_f32_f32.h
788
- )
789
-
790
- # Create a custom command that depends on the generated_shaders
791
- add_custom_command(
792
- OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
793
- COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
794
- DEPENDS generated_shaders
795
- COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
796
- )
797
-
798
- # Add the stamp to the main sources to ensure dependency tracking
799
- set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
800
- set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
801
-
802
- add_compile_definitions(GGML_USE_KOMPUTE)
803
-
804
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
805
- set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
806
- else()
807
- message(WARNING "Kompute not found")
808
- endif()
809
- endif()
810
-
811
- if (LLAMA_CPU_HBM)
812
- find_library(memkind memkind REQUIRED)
813
-
814
- add_compile_definitions(GGML_USE_CPU_HBM)
815
-
816
- target_link_libraries(ggml PUBLIC memkind)
817
- endif()
818
-
819
- if (LLAMA_PERF)
820
- add_compile_definitions(GGML_PERF)
821
- endif()
822
-
823
- function(get_flags CCID CCVER)
824
- set(C_FLAGS "")
825
- set(CXX_FLAGS "")
826
-
827
- if (CCID MATCHES "Clang")
828
- set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
829
- set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
830
-
831
- if (
832
- (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
833
- (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
834
- )
835
- list(APPEND C_FLAGS -Wdouble-promotion)
836
- endif()
837
- elseif (CCID STREQUAL "GNU")
838
- set(C_FLAGS -Wdouble-promotion)
839
- set(CXX_FLAGS -Wno-array-bounds)
840
-
841
- if (CCVER VERSION_GREATER_EQUAL 7.1.0)
842
- list(APPEND CXX_FLAGS -Wno-format-truncation)
843
- endif()
844
- if (CCVER VERSION_GREATER_EQUAL 8.1.0)
845
- list(APPEND CXX_FLAGS -Wextra-semi)
846
- endif()
847
- endif()
848
-
849
- set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
850
- set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
851
98
  endfunction()
852
99
 
853
- if (LLAMA_FATAL_WARNINGS)
854
- if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
855
- list(APPEND C_FLAGS -Werror)
856
- list(APPEND CXX_FLAGS -Werror)
857
- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
858
- add_compile_options(/WX)
859
- endif()
860
- endif()
861
-
862
- if (LLAMA_ALL_WARNINGS)
863
- if (NOT MSVC)
864
- list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
865
- list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
866
- -Werror=implicit-int -Werror=implicit-function-declaration)
867
- list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
868
-
869
- list(APPEND C_FLAGS ${WARNING_FLAGS})
870
- list(APPEND CXX_FLAGS ${WARNING_FLAGS})
871
-
872
- get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
873
-
874
- add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
875
- "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
876
- else()
877
- # todo : msvc
878
- set(C_FLAGS "")
879
- set(CXX_FLAGS "")
880
- endif()
881
- endif()
882
-
883
- set(CUDA_CXX_FLAGS "")
884
-
885
- if (LLAMA_CUDA)
886
- set(CUDA_FLAGS -use_fast_math)
887
-
888
- if (LLAMA_FATAL_WARNINGS)
889
- list(APPEND CUDA_FLAGS -Werror all-warnings)
890
- endif()
891
-
892
- if (LLAMA_ALL_WARNINGS AND NOT MSVC)
893
- set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
894
- if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
895
- list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
896
- endif()
897
-
898
- execute_process(
899
- COMMAND ${NVCC_CMD} -Xcompiler --version
900
- OUTPUT_VARIABLE CUDA_CCFULLVER
901
- ERROR_QUIET
902
- )
903
-
904
- if (NOT CUDA_CCFULLVER MATCHES clang)
905
- set(CUDA_CCID "GNU")
906
- execute_process(
907
- COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
908
- OUTPUT_VARIABLE CUDA_CCVER
909
- ERROR_QUIET
910
- )
911
- else()
912
- if (CUDA_CCFULLVER MATCHES Apple)
913
- set(CUDA_CCID "AppleClang")
914
- else()
915
- set(CUDA_CCID "Clang")
916
- endif()
917
- string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
918
- endif()
919
-
920
- message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
921
-
922
- get_flags(${CUDA_CCID} ${CUDA_CCVER})
923
- list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
924
- endif()
925
-
926
- if (NOT MSVC)
927
- list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
928
- endif()
929
- endif()
930
-
931
- if (WIN32)
932
- add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
933
-
934
- if (BUILD_SHARED_LIBS)
935
- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
936
- endif()
937
- endif()
938
-
939
- if (LLAMA_LTO)
940
- include(CheckIPOSupported)
941
- check_ipo_supported(RESULT result OUTPUT output)
942
- if (result)
943
- set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
944
- else()
945
- message(WARNING "IPO is not supported: ${output}")
946
- endif()
947
- endif()
948
-
949
- if (LLAMA_CCACHE)
950
- find_program(LLAMA_CCACHE_FOUND ccache)
951
- if (LLAMA_CCACHE_FOUND)
952
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
953
- set(ENV{CCACHE_SLOPPINESS} time_macros)
954
- message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.")
955
- else()
956
- message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF")
957
- endif ()
958
- endif()
959
-
960
- # this version of Apple ld64 is buggy
961
- execute_process(
962
- COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
963
- ERROR_VARIABLE output
964
- OUTPUT_QUIET
965
- )
966
-
967
- if (output MATCHES "dyld-1015\.7")
968
- add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
969
- endif()
970
-
971
- # Architecture specific
972
- # TODO: probably these flags need to be tweaked on some architectures
973
- # feel free to update the Makefile for your architecture and send a pull request or issue
974
- message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
975
- if (MSVC)
976
- string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
977
- message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
978
- else ()
979
- set(CMAKE_GENERATOR_PLATFORM_LWR "")
980
- endif ()
981
-
982
- if (NOT MSVC)
983
- if (LLAMA_STATIC)
984
- add_link_options(-static)
985
- if (MINGW)
986
- add_link_options(-static-libgcc -static-libstdc++)
987
- endif()
988
- endif()
989
- if (LLAMA_GPROF)
990
- add_compile_options(-pg)
991
- endif()
992
- endif()
993
-
994
- set(ARCH_FLAGS "")
995
-
996
- if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
997
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
998
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
999
- message(STATUS "ARM detected")
1000
- if (MSVC)
1001
- add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
1002
- add_compile_definitions(__ARM_NEON)
1003
- add_compile_definitions(__ARM_FEATURE_FMA)
1004
-
1005
- set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
1006
- string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
1007
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
1008
- if (GGML_COMPILER_SUPPORT_DOTPROD)
1009
- add_compile_definitions(__ARM_FEATURE_DOTPROD)
1010
- endif ()
1011
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
1012
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
1013
- add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
1014
- endif ()
1015
-
1016
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1017
- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1018
- add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
1019
- endif ()
1020
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
1021
- else()
1022
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
1023
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
1024
- list(APPEND ARCH_FLAGS -mfp16-format=ieee)
1025
- endif()
1026
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
1027
- # Raspberry Pi 1, Zero
1028
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
1029
- endif()
1030
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
1031
- if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
1032
- # Android armeabi-v7a
1033
- list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
1034
- else()
1035
- # Raspberry Pi 2
1036
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
1037
- endif()
1038
- endif()
1039
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
1040
- # Android arm64-v8a
1041
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
1042
- list(APPEND ARCH_FLAGS -mno-unaligned-access)
1043
- endif()
1044
- if (LLAMA_SVE)
1045
- list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
1046
- endif()
1047
- endif()
1048
- elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
1049
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1050
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
1051
- message(STATUS "x86 detected")
1052
- if (MSVC)
1053
- # instruction set detection for MSVC only
1054
- if (LLAMA_NATIVE)
1055
- include(cmake/FindSIMD.cmake)
1056
- endif ()
1057
- if (LLAMA_AVX512)
1058
- list(APPEND ARCH_FLAGS /arch:AVX512)
1059
- # MSVC has no compile-time flags enabling specific
1060
- # AVX512 extensions, neither it defines the
1061
- # macros corresponding to the extensions.
1062
- # Do it manually.
1063
- if (LLAMA_AVX512_VBMI)
1064
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
1065
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
1066
- endif()
1067
- if (LLAMA_AVX512_VNNI)
1068
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
1069
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
1070
- endif()
1071
- if (LLAMA_AVX512_BF16)
1072
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
1073
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
1074
- endif()
1075
- elseif (LLAMA_AVX2)
1076
- list(APPEND ARCH_FLAGS /arch:AVX2)
1077
- elseif (LLAMA_AVX)
1078
- list(APPEND ARCH_FLAGS /arch:AVX)
1079
- endif()
1080
- else()
1081
- if (LLAMA_NATIVE)
1082
- list(APPEND ARCH_FLAGS -march=native)
1083
- endif()
1084
- if (LLAMA_F16C)
1085
- list(APPEND ARCH_FLAGS -mf16c)
1086
- endif()
1087
- if (LLAMA_FMA)
1088
- list(APPEND ARCH_FLAGS -mfma)
1089
- endif()
1090
- if (LLAMA_AVX)
1091
- list(APPEND ARCH_FLAGS -mavx)
1092
- endif()
1093
- if (LLAMA_AVX2)
1094
- list(APPEND ARCH_FLAGS -mavx2)
1095
- endif()
1096
- if (LLAMA_AVX512)
1097
- list(APPEND ARCH_FLAGS -mavx512f)
1098
- list(APPEND ARCH_FLAGS -mavx512bw)
1099
- endif()
1100
- if (LLAMA_AVX512_VBMI)
1101
- list(APPEND ARCH_FLAGS -mavx512vbmi)
1102
- endif()
1103
- if (LLAMA_AVX512_VNNI)
1104
- list(APPEND ARCH_FLAGS -mavx512vnni)
1105
- endif()
1106
- if (LLAMA_AVX512_BF16)
1107
- list(APPEND ARCH_FLAGS -mavx512bf16)
1108
- endif()
1109
- endif()
1110
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1111
- message(STATUS "PowerPC detected")
1112
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
1113
- list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
1114
- else()
1115
- list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
1116
- #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
1117
- endif()
1118
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
1119
- message(STATUS "loongarch64 detected")
1120
-
1121
- list(APPEND ARCH_FLAGS -march=loongarch64)
1122
- if (LLAMA_LASX)
1123
- list(APPEND ARCH_FLAGS -mlasx)
1124
- endif()
1125
- if (LLAMA_LSX)
1126
- list(APPEND ARCH_FLAGS -mlsx)
1127
- endif()
1128
-
1129
- else()
1130
- message(STATUS "Unknown architecture")
1131
- endif()
1132
-
1133
- add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
1134
- add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
1135
-
1136
- if (LLAMA_CUDA)
1137
- list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
1138
- list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
1139
- if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
1140
- list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
1141
- endif()
1142
- add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
1143
- endif()
1144
-
1145
- if (MINGW)
1146
- # Target Windows 8 for PrefetchVirtualMemory
1147
- add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
1148
- endif()
100
+ llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
101
+ llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
102
+ llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
103
+ llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
104
+ llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
105
+ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
106
+ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
107
+ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
108
+ llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
109
+ llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
1149
110
 
1150
111
  #
1151
- # POSIX conformance
112
+ # build the library
1152
113
  #
1153
114
 
1154
- # clock_gettime came in POSIX.1b (1993)
1155
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
1156
- # posix_memalign came in POSIX.1-2001 / SUSv3
1157
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
1158
- add_compile_definitions(_XOPEN_SOURCE=600)
1159
-
1160
- # Somehow in OpenBSD whenever POSIX conformance is specified
1161
- # some string functions rely on locale_t availability,
1162
- # which was introduced in POSIX.1-2008, forcing us to go higher
1163
- if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1164
- remove_definitions(-D_XOPEN_SOURCE=600)
1165
- add_compile_definitions(_XOPEN_SOURCE=700)
1166
- endif()
1167
-
1168
- # Data types, macros and functions related to controlling CPU affinity and
1169
- # some memory allocation are available on Linux through GNU extensions in libc
1170
- if (CMAKE_SYSTEM_NAME MATCHES "Linux")
1171
- add_compile_definitions(_GNU_SOURCE)
1172
- endif()
1173
-
1174
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
1175
- # and on macOS its availability depends on enabling Darwin extensions
1176
- # similarly on DragonFly, enabling BSD extensions is necessary
1177
- if (
1178
- CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
1179
- CMAKE_SYSTEM_NAME MATCHES "iOS" OR
1180
- CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
1181
- CMAKE_SYSTEM_NAME MATCHES "DragonFly"
1182
- )
1183
- add_compile_definitions(_DARWIN_C_SOURCE)
1184
- endif()
1185
-
1186
- # alloca is a non-standard interface that is not visible on BSDs when
1187
- # POSIX conformance is specified, but not all of them provide a clean way
1188
- # to enable it in such cases
1189
- if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
1190
- add_compile_definitions(__BSD_VISIBLE)
1191
- endif()
1192
- if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
1193
- add_compile_definitions(_NETBSD_SOURCE)
1194
- endif()
1195
- if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1196
- add_compile_definitions(_BSD_SOURCE)
115
+ if (NOT TARGET ggml)
116
+ add_subdirectory(ggml)
117
+ # ... otherwise assume ggml is added by a parent CMakeLists.txt
1197
118
  endif()
119
+ add_subdirectory(src)
1198
120
 
1199
121
  #
1200
- # libraries
122
+ # install
1201
123
  #
1202
124
 
1203
- # ggml
1204
-
1205
- add_library(ggml OBJECT
1206
- ggml.c
1207
- ggml.h
1208
- ggml-alloc.c
1209
- ggml-alloc.h
1210
- ggml-backend.c
1211
- ggml-backend.h
1212
- ggml-quants.c
1213
- ggml-quants.h
1214
- ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1215
- ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1216
- ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1217
- ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
1218
- ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1219
- ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1220
- ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1221
- ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1222
- ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1223
- ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1224
- )
1225
-
1226
- target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
1227
- target_compile_features (ggml PUBLIC c_std_11) # don't bump
1228
-
1229
- target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
1230
-
1231
- add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
1232
-
1233
- if (BUILD_SHARED_LIBS)
1234
- set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
1235
- add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
1236
- target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
1237
- install(TARGETS ggml_shared LIBRARY)
1238
- endif()
1239
-
1240
- # llama
1241
-
1242
- add_library(llama
1243
- llama.cpp
1244
- llama.h
1245
- unicode.h
1246
- unicode.cpp
1247
- unicode-data.cpp
1248
- )
1249
-
1250
- target_include_directories(llama PUBLIC .)
1251
- target_compile_features (llama PUBLIC cxx_std_11) # don't bump
125
+ include(GNUInstallDirs)
126
+ include(CMakePackageConfigHelpers)
1252
127
 
1253
- target_link_libraries(llama PRIVATE
1254
- ggml
1255
- ${LLAMA_EXTRA_LIBS}
1256
- )
128
+ set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
129
+ set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
130
+ set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
1257
131
 
1258
- if (BUILD_SHARED_LIBS)
1259
- set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
1260
- target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
1261
- if (LLAMA_METAL)
1262
- set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
1263
- endif()
1264
- endif()
132
+ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
133
+ set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
134
+ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
1265
135
 
1266
136
 
137
+ # At the moment some compile definitions are placed within the ggml/src
138
+ # directory but not exported on the `ggml` target. This could be improved by
139
+ # determining _precisely_ which defines are necessary for the llama-config
140
+ # package.
1267
141
  #
1268
- # install
1269
- #
142
+ get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
143
+ get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
144
+ set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
145
+ get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
1270
146
 
1271
- include(GNUInstallDirs)
1272
- include(CMakePackageConfigHelpers)
1273
-
1274
- set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}
1275
- CACHE PATH "Location of header files")
1276
- set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}
1277
- CACHE PATH "Location of library files")
1278
- set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
1279
- CACHE PATH "Location of binary files")
1280
- set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
1281
- set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
1282
- set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
1283
- get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
147
+ set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
148
+ install(TARGETS llama LIBRARY PUBLIC_HEADER)
1284
149
 
1285
150
  configure_package_config_file(
1286
- ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
1287
- ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
1288
- INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama
151
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
152
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
153
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama
1289
154
  PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
1290
155
  LLAMA_LIB_INSTALL_DIR
1291
156
  LLAMA_BIN_INSTALL_DIR )
1292
157
 
1293
158
  write_basic_package_version_file(
1294
- ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
159
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
1295
160
  VERSION ${LLAMA_INSTALL_VERSION}
1296
161
  COMPATIBILITY SameMajorVersion)
1297
162
 
1298
- install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
1299
- ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
1300
- DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
1301
-
1302
- set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
1303
- "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1304
- "${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
1305
-
1306
- set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
1307
- install(TARGETS ggml PUBLIC_HEADER)
1308
-
1309
- set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/llama.h)
1310
- install(TARGETS llama LIBRARY PUBLIC_HEADER)
163
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
164
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
165
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
1311
166
 
1312
167
  install(
1313
- FILES convert.py
168
+ FILES convert_hf_to_gguf.py
1314
169
  PERMISSIONS
1315
170
  OWNER_READ
1316
171
  OWNER_WRITE
@@ -1320,22 +175,13 @@ install(
1320
175
  WORLD_READ
1321
176
  WORLD_EXECUTE
1322
177
  DESTINATION ${CMAKE_INSTALL_BINDIR})
1323
- if (LLAMA_METAL)
1324
- install(
1325
- FILES ggml-metal.metal
1326
- PERMISSIONS
1327
- OWNER_READ
1328
- OWNER_WRITE
1329
- GROUP_READ
1330
- WORLD_READ
1331
- DESTINATION ${CMAKE_INSTALL_BINDIR})
1332
- if (NOT LLAMA_METAL_EMBED_LIBRARY)
1333
- install(
1334
- FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
1335
- DESTINATION ${CMAKE_INSTALL_BINDIR}
1336
- )
1337
- endif()
1338
- endif()
178
+
179
+ configure_file(cmake/llama.pc.in
180
+ "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
181
+ @ONLY)
182
+
183
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
184
+ DESTINATION lib/pkgconfig)
1339
185
 
1340
186
  #
1341
187
  # programs, examples and tests