@fugood/llama.node 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/CMakeLists.txt +6 -3
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +3 -3
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  24. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  25. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  26. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  27. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  28. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  29. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  31. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  32. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  36. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  37. package/src/llama.cpp/CMakeLists.txt +91 -1245
  38. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  39. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  40. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  41. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  42. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  43. package/src/llama.cpp/common/common.cpp +1116 -877
  44. package/src/llama.cpp/common/common.h +191 -77
  45. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  46. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  47. package/src/llama.cpp/common/log.h +1 -1
  48. package/src/llama.cpp/common/ngram-cache.h +10 -3
  49. package/src/llama.cpp/common/sampling.cpp +19 -10
  50. package/src/llama.cpp/docs/build.md +353 -0
  51. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  52. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  54. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  56. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  58. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  60. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  61. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  62. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  63. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  64. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  65. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  66. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  67. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  68. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  69. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  71. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  72. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  73. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  75. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  76. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  77. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  79. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  80. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  87. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  88. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  90. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  91. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  92. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  94. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  95. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  96. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  97. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  98. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  99. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  100. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  102. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  103. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  104. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  105. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  106. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  107. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  108. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  110. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  111. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  112. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  113. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  114. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  115. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  116. package/src/llama.cpp/examples/main/main.cpp +98 -75
  117. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  118. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  119. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  120. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  121. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  122. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  123. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  124. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  125. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  126. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  127. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  128. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  129. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  130. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  131. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  132. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  133. package/src/llama.cpp/examples/server/server.cpp +274 -671
  134. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  135. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  136. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  137. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  138. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  139. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  140. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  141. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  142. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  143. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  144. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  145. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  146. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  147. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  148. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  149. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  150. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  151. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  152. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  153. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  154. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  155. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  156. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  157. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  159. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  160. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  161. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  162. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  163. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  178. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  179. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  180. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  181. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  182. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  183. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  184. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  208. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  209. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  210. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  211. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  212. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  214. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  215. package/src/llama.cpp/models/.editorconfig +1 -0
  216. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  217. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  221. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  230. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  233. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  249. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  252. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  255. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  258. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  259. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  260. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  261. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  263. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  264. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  265. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  266. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  267. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  268. package/src/llama.cpp/requirements.txt +5 -4
  269. package/src/llama.cpp/scripts/build-info.sh +30 -0
  270. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  271. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  272. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  273. package/src/llama.cpp/src/llama-grammar.h +39 -0
  274. package/src/llama.cpp/src/llama-impl.h +26 -0
  275. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  276. package/src/llama.cpp/src/llama-sampling.h +56 -0
  277. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  278. package/src/llama.cpp/src/llama-vocab.h +130 -0
  279. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  280. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  281. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  282. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  283. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  284. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  285. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  286. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  287. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  289. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  290. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  291. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  292. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  293. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  294. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  295. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  296. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  297. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  298. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  299. package/bin/darwin/arm64/default.metallib +0 -0
  300. package/bin/darwin/x64/default.metallib +0 -0
  301. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  302. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  303. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  304. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  305. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  306. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  307. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  308. package/src/llama.cpp/ggml-opencl.h +0 -36
  309. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  310. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  311. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  314. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  315. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  316. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  317. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  318. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  319. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -0,0 +1,1315 @@
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
14
+ pull_request:
15
+ types: [opened, synchronize, reopened]
16
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ env:
23
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
24
+ GGML_NLOOP: 3
25
+ GGML_N_THREADS: 1
26
+
27
+ jobs:
28
+ macOS-latest-cmake-arm64:
29
+ runs-on: macos-14
30
+
31
+ steps:
32
+ - name: Clone
33
+ id: checkout
34
+ uses: actions/checkout@v4
35
+ with:
36
+ fetch-depth: 0
37
+
38
+ - name: Dependencies
39
+ id: depends
40
+ continue-on-error: true
41
+ run: |
42
+ brew update
43
+
44
+ - name: Build
45
+ id: cmake_build
46
+ run: |
47
+ sysctl -a
48
+ mkdir build
49
+ cd build
50
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
51
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
52
+
53
+ - name: Test
54
+ id: cmake_test
55
+ run: |
56
+ cd build
57
+ ctest -L 'main|curl' --verbose --timeout 900
58
+
59
+ - name: Determine tag name
60
+ id: tag
61
+ shell: bash
62
+ run: |
63
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
64
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
65
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
66
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
67
+ else
68
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
69
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
70
+ fi
71
+
72
+ - name: Pack artifacts
73
+ id: pack_artifacts
74
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
75
+ run: |
76
+ cp LICENSE ./build/bin/
77
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
78
+
79
+ - name: Upload artifacts
80
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
81
+ uses: actions/upload-artifact@v4
82
+ with:
83
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
84
+ name: llama-bin-macos-arm64.zip
85
+
86
+ macOS-latest-cmake-x64:
87
+ runs-on: macos-12
88
+
89
+ steps:
90
+ - name: Clone
91
+ id: checkout
92
+ uses: actions/checkout@v4
93
+ with:
94
+ fetch-depth: 0
95
+
96
+ - name: Dependencies
97
+ id: depends
98
+ continue-on-error: true
99
+ run: |
100
+ brew update
101
+
102
+ - name: Build
103
+ id: cmake_build
104
+ run: |
105
+ sysctl -a
106
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
107
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108
+ cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
109
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110
+
111
+ - name: Test
112
+ id: cmake_test
113
+ run: |
114
+ cd build
115
+ ctest -L main --verbose --timeout 900
116
+
117
+ - name: Determine tag name
118
+ id: tag
119
+ shell: bash
120
+ run: |
121
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
122
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
123
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
124
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
125
+ else
126
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
127
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
128
+ fi
129
+
130
+ - name: Pack artifacts
131
+ id: pack_artifacts
132
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
133
+ run: |
134
+ cp LICENSE ./build/bin/
135
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
136
+
137
+ - name: Upload artifacts
138
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
139
+ uses: actions/upload-artifact@v4
140
+ with:
141
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
142
+ name: llama-bin-macos-x64.zip
143
+
144
+ ubuntu-focal-make:
145
+ runs-on: ubuntu-20.04
146
+ env:
147
+ LLAMA_NODE_AVAILABLE: true
148
+ LLAMA_PYTHON_AVAILABLE: true
149
+
150
+ steps:
151
+ - name: Clone
152
+ id: checkout
153
+ uses: actions/checkout@v4
154
+
155
+ - name: Dependencies
156
+ id: depends
157
+ run: |
158
+ sudo apt-get update
159
+ sudo apt-get install build-essential gcc-8
160
+
161
+ - uses: actions/setup-node@v4
162
+ with:
163
+ node-version: "20"
164
+
165
+ - uses: actions/setup-python@v5
166
+ with:
167
+ python-version: "3.11"
168
+
169
+ - name: Build
170
+ id: make_build
171
+ env:
172
+ LLAMA_FATAL_WARNINGS: 1
173
+ run: |
174
+ CC=gcc-8 make -j $(nproc)
175
+
176
+ - name: Test
177
+ id: make_test
178
+ run: |
179
+ CC=gcc-8 make tests -j $(nproc)
180
+ make test -j $(nproc)
181
+
182
+ ubuntu-focal-make-curl:
183
+ runs-on: ubuntu-20.04
184
+
185
+ steps:
186
+ - name: Clone
187
+ id: checkout
188
+ uses: actions/checkout@v4
189
+
190
+ - name: Dependencies
191
+ id: depends
192
+ run: |
193
+ sudo apt-get update
194
+ sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
195
+
196
+ - name: Build
197
+ id: make_build
198
+ env:
199
+ LLAMA_FATAL_WARNINGS: 1
200
+ LLAMA_CURL: 1
201
+ run: |
202
+ CC=gcc-8 make -j $(nproc)
203
+
204
+ ubuntu-latest-cmake:
205
+ runs-on: ubuntu-latest
206
+
207
+ steps:
208
+ - name: Clone
209
+ id: checkout
210
+ uses: actions/checkout@v4
211
+ with:
212
+ fetch-depth: 0
213
+
214
+ - name: Dependencies
215
+ id: depends
216
+ run: |
217
+ sudo apt-get update
218
+ sudo apt-get install build-essential libcurl4-openssl-dev
219
+
220
+ - name: Build
221
+ id: cmake_build
222
+ run: |
223
+ mkdir build
224
+ cd build
225
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
226
+ cmake --build . --config Release -j $(nproc)
227
+
228
+ - name: Test
229
+ id: cmake_test
230
+ run: |
231
+ cd build
232
+ ctest -L 'main|curl' --verbose --timeout 900
233
+
234
+ - name: Test llama2c conversion
235
+ id: llama2c_test
236
+ run: |
237
+ cd build
238
+ echo "Fetch tokenizer"
239
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
240
+ echo "Fetch llama2c model"
241
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
242
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
243
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
244
+
245
+ - name: Determine tag name
246
+ id: tag
247
+ shell: bash
248
+ run: |
249
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
250
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
251
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
252
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
253
+ else
254
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
255
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
256
+ fi
257
+
258
+ - name: Pack artifacts
259
+ id: pack_artifacts
260
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
261
+ run: |
262
+ cp LICENSE ./build/bin/
263
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
264
+
265
+ - name: Upload artifacts
266
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
267
+ uses: actions/upload-artifact@v4
268
+ with:
269
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
270
+ name: llama-bin-ubuntu-x64.zip
271
+
272
+ ubuntu-latest-cmake-sanitizer:
273
+ runs-on: ubuntu-latest
274
+
275
+ continue-on-error: true
276
+
277
+ strategy:
278
+ matrix:
279
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
280
+ build_type: [Debug, Release]
281
+
282
+ steps:
283
+ - name: Clone
284
+ id: checkout
285
+ uses: actions/checkout@v4
286
+
287
+ - name: Dependencies
288
+ id: depends
289
+ run: |
290
+ sudo apt-get update
291
+ sudo apt-get install build-essential
292
+
293
+ - name: Build
294
+ id: cmake_build
295
+ if: ${{ matrix.sanitizer != 'THREAD' }}
296
+ run: |
297
+ mkdir build
298
+ cd build
299
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
300
+ cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
301
+
302
+ - name: Build (no OpenMP)
303
+ id: cmake_build_no_openmp
304
+ if: ${{ matrix.sanitizer == 'THREAD' }}
305
+ run: |
306
+ mkdir build
307
+ cd build
308
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
309
+ cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
310
+
311
+ - name: Test
312
+ id: cmake_test
313
+ run: |
314
+ cd build
315
+ ctest -L main --verbose --timeout 900
316
+
317
+ ubuntu-latest-cmake-rpc:
318
+ runs-on: ubuntu-latest
319
+
320
+ continue-on-error: true
321
+
322
+ steps:
323
+ - name: Clone
324
+ id: checkout
325
+ uses: actions/checkout@v4
326
+
327
+ - name: Dependencies
328
+ id: depends
329
+ run: |
330
+ sudo apt-get update
331
+ sudo apt-get install build-essential
332
+
333
+ - name: Build
334
+ id: cmake_build
335
+ run: |
336
+ mkdir build
337
+ cd build
338
+ cmake -DGGML_RPC=ON ..
339
+ cmake --build . --config Release -j $(nproc)
340
+
341
+ - name: Test
342
+ id: cmake_test
343
+ run: |
344
+ cd build
345
+ ctest -L main --verbose
346
+
347
+ ubuntu-22-cmake-vulkan:
348
+ runs-on: ubuntu-22.04
349
+
350
+ steps:
351
+ - name: Clone
352
+ id: checkout
353
+ uses: actions/checkout@v4
354
+
355
+ - name: Dependencies
356
+ id: depends
357
+ run: |
358
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
359
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
360
+ sudo apt-get update -y
361
+ sudo apt-get install -y build-essential vulkan-sdk
362
+
363
+ - name: Build
364
+ id: cmake_build
365
+ run: |
366
+ mkdir build
367
+ cd build
368
+ cmake -DGGML_VULKAN=ON ..
369
+ cmake --build . --config Release -j $(nproc)
370
+
371
+ ubuntu-22-cmake-hip:
372
+ runs-on: ubuntu-22.04
373
+ container: rocm/dev-ubuntu-22.04:6.0.2
374
+
375
+ steps:
376
+ - name: Clone
377
+ id: checkout
378
+ uses: actions/checkout@v3
379
+
380
+ - name: Dependencies
381
+ id: depends
382
+ run: |
383
+ sudo apt-get update
384
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
385
+
386
+ - name: Build with native CMake HIP support
387
+ id: cmake_build
388
+ run: |
389
+ cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
390
+ cmake --build build --config Release -j $(nproc)
391
+
392
+ - name: Build with legacy HIP support
393
+ id: cmake_build_legacy_hip
394
+ run: |
395
+ cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
396
+ cmake --build build2 --config Release -j $(nproc)
397
+
398
+ ubuntu-22-cmake-sycl:
399
+ runs-on: ubuntu-22.04
400
+
401
+ continue-on-error: true
402
+
403
+ steps:
404
+ - uses: actions/checkout@v2
405
+
406
+ - name: add oneAPI to apt
407
+ shell: bash
408
+ run: |
409
+ cd /tmp
410
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
411
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
412
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
413
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
414
+
415
+ - name: install oneAPI dpcpp compiler
416
+ shell: bash
417
+ run: |
418
+ sudo apt update
419
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
420
+
421
+ - name: install oneAPI MKL library
422
+ shell: bash
423
+ run: |
424
+ sudo apt install intel-oneapi-mkl-devel
425
+
426
+ - name: Clone
427
+ id: checkout
428
+ uses: actions/checkout@v4
429
+
430
+ - name: Build
431
+ id: cmake_build
432
+ run: |
433
+ source /opt/intel/oneapi/setvars.sh
434
+ mkdir build
435
+ cd build
436
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
437
+ cmake --build . --config Release -j $(nproc)
438
+
439
+ ubuntu-22-cmake-sycl-fp16:
440
+ runs-on: ubuntu-22.04
441
+
442
+ continue-on-error: true
443
+
444
+ steps:
445
+ - uses: actions/checkout@v2
446
+
447
+ - name: add oneAPI to apt
448
+ shell: bash
449
+ run: |
450
+ cd /tmp
451
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
452
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
453
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
454
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
455
+
456
+ - name: install oneAPI dpcpp compiler
457
+ shell: bash
458
+ run: |
459
+ sudo apt update
460
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
461
+
462
+ - name: install oneAPI MKL library
463
+ shell: bash
464
+ run: |
465
+ sudo apt install intel-oneapi-mkl-devel
466
+
467
+ - name: Clone
468
+ id: checkout
469
+ uses: actions/checkout@v4
470
+
471
+ - name: Build
472
+ id: cmake_build
473
+ run: |
474
+ source /opt/intel/oneapi/setvars.sh
475
+ mkdir build
476
+ cd build
477
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
478
+ cmake --build . --config Release -j $(nproc)
479
+
480
+ # TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
481
+ # how to debug it.
482
+ # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
483
+ macOS-latest-make:
484
+ runs-on: macos-latest
485
+
486
+ steps:
487
+ - name: Clone
488
+ id: checkout
489
+ uses: actions/checkout@v4
490
+
491
+ - name: Dependencies
492
+ id: depends
493
+ continue-on-error: true
494
+ run: |
495
+ brew update
496
+
497
+ - name: Build
498
+ id: make_build
499
+ env:
500
+ LLAMA_FATAL_WARNINGS: 1
501
+ run: |
502
+ GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
503
+
504
+ - name: Test
505
+ id: make_test
506
+ run: |
507
+ GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
508
+ GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
509
+
510
+ # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
511
+ # how to debug it.
512
+ # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
513
+ # would be great if we fix these
514
+ macOS-latest-cmake:
515
+ runs-on: macos-latest
516
+
517
+ steps:
518
+ - name: Clone
519
+ id: checkout
520
+ uses: actions/checkout@v4
521
+
522
+ - name: Dependencies
523
+ id: depends
524
+ continue-on-error: true
525
+ run: |
526
+ brew update
527
+
528
+ - name: Build
529
+ id: cmake_build
530
+ run: |
531
+ sysctl -a
532
+ mkdir build
533
+ cd build
534
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
535
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
536
+
537
+ - name: Test
538
+ id: cmake_test
539
+ run: |
540
+ cd build
541
+ ctest -L main --verbose --timeout 900
542
+
543
+ macOS-latest-cmake-ios:
544
+ runs-on: macos-latest
545
+
546
+ steps:
547
+ - name: Clone
548
+ id: checkout
549
+ uses: actions/checkout@v1
550
+
551
+ - name: Dependencies
552
+ id: depends
553
+ continue-on-error: true
554
+ run: |
555
+ brew update
556
+
557
+ - name: Build
558
+ id: cmake_build
559
+ run: |
560
+ sysctl -a
561
+ mkdir build
562
+ cd build
563
+ cmake -G Xcode .. \
564
+ -DGGML_METAL_EMBED_LIBRARY=ON \
565
+ -DLLAMA_BUILD_EXAMPLES=OFF \
566
+ -DLLAMA_BUILD_TESTS=OFF \
567
+ -DLLAMA_BUILD_SERVER=OFF \
568
+ -DCMAKE_SYSTEM_NAME=iOS \
569
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
570
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
571
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
572
+
573
+ macOS-latest-cmake-tvos:
574
+ runs-on: macos-latest
575
+
576
+ steps:
577
+ - name: Clone
578
+ id: checkout
579
+ uses: actions/checkout@v1
580
+
581
+ - name: Dependencies
582
+ id: depends
583
+ continue-on-error: true
584
+ run: |
585
+ brew update
586
+
587
+ - name: Build
588
+ id: cmake_build
589
+ run: |
590
+ sysctl -a
591
+ mkdir build
592
+ cd build
593
+ cmake -G Xcode .. \
594
+ -DGGML_METAL_EMBED_LIBRARY=ON \
595
+ -DLLAMA_BUILD_EXAMPLES=OFF \
596
+ -DLLAMA_BUILD_TESTS=OFF \
597
+ -DLLAMA_BUILD_SERVER=OFF \
598
+ -DCMAKE_SYSTEM_NAME=tvOS \
599
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
600
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
601
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
602
+
603
+ macOS-latest-swift:
604
+ runs-on: macos-latest
605
+
606
+ strategy:
607
+ matrix:
608
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
609
+
610
+ steps:
611
+ - name: Clone
612
+ id: checkout
613
+ uses: actions/checkout@v1
614
+
615
+ - name: Dependencies
616
+ id: depends
617
+ continue-on-error: true
618
+ run: |
619
+ brew update
620
+
621
+ - name: xcodebuild for swift package
622
+ id: xcodebuild
623
+ run: |
624
+ xcodebuild -scheme llama -destination "${{ matrix.destination }}"
625
+
626
+ - name: Build Swift Example
627
+ id: make_build_swift_example
628
+ run: |
629
+ make swift
630
+
631
+ windows-msys2:
632
+ runs-on: windows-latest
633
+
634
+ strategy:
635
+ fail-fast: false
636
+ matrix:
637
+ include:
638
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
639
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
640
+
641
+ steps:
642
+ - name: Clone
643
+ uses: actions/checkout@v4
644
+
645
+ - name: Setup ${{ matrix.sys }}
646
+ uses: msys2/setup-msys2@v2
647
+ with:
648
+ update: true
649
+ msystem: ${{matrix.sys}}
650
+ install: >-
651
+ base-devel
652
+ mingw-w64-${{matrix.env}}-toolchain
653
+ mingw-w64-${{matrix.env}}-cmake
654
+ mingw-w64-${{matrix.env}}-openblas
655
+
656
+ - name: Build using make
657
+ shell: msys2 {0}
658
+ run: |
659
+ make -j $(nproc)
660
+
661
+ - name: Clean after building using make
662
+ shell: msys2 {0}
663
+ run: |
664
+ make clean
665
+
666
+ - name: Build using make w/ OpenBLAS
667
+ shell: msys2 {0}
668
+ run: |
669
+ make GGML_OPENBLAS=1 -j $(nproc)
670
+
671
+ - name: Build using CMake
672
+ shell: msys2 {0}
673
+ run: |
674
+ cmake -B build
675
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
676
+
677
+ - name: Clean after building using CMake
678
+ shell: msys2 {0}
679
+ run: |
680
+ rm -rf build
681
+
682
+ - name: Build using CMake w/ OpenBLAS
683
+ shell: msys2 {0}
684
+ run: |
685
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
686
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
687
+
688
+ windows-latest-cmake:
689
+ runs-on: windows-2019
690
+
691
+ env:
692
+ OPENBLAS_VERSION: 0.3.23
693
+ SDE_VERSION: 9.33.0-2024-01-07
694
+ VULKAN_VERSION: 1.3.261.1
695
+
696
+ strategy:
697
+ matrix:
698
+ include:
699
+ - build: 'rpc-x64'
700
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
701
+ - build: 'noavx-x64'
702
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
703
+ - build: 'avx2-x64'
704
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
705
+ - build: 'avx-x64'
706
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
707
+ - build: 'avx512-x64'
708
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709
+ - build: 'openblas-x64'
710
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
711
+ - build: 'kompute-x64'
712
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
713
+ - build: 'vulkan-x64'
714
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
715
+ - build: 'llvm-arm64'
716
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
717
+ - build: 'msvc-arm64'
718
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
719
+
720
+ steps:
721
+ - name: Clone
722
+ id: checkout
723
+ uses: actions/checkout@v4
724
+ with:
725
+ fetch-depth: 0
726
+
727
+ - name: Clone Kompute submodule
728
+ id: clone_kompute
729
+ if: ${{ matrix.build == 'kompute-x64' }}
730
+ run: |
731
+ git submodule update --init ggml/src/kompute
732
+
733
+ - name: Download OpenBLAS
734
+ id: get_openblas
735
+ if: ${{ matrix.build == 'openblas-x64' }}
736
+ run: |
737
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
738
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
739
+ mkdir $env:RUNNER_TEMP/openblas
740
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
741
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
742
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
743
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
744
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
745
+
746
+ - name: Install Vulkan SDK
747
+ id: get_vulkan
748
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
749
+ run: |
750
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
751
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
752
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
753
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
754
+
755
+ - name: Install Ninja
756
+ id: install_ninja
757
+ run: |
758
+ choco install ninja
759
+
760
+ - name: Build
761
+ id: cmake_build
762
+ run: |
763
+ cmake -S . -B build ${{ matrix.defines }}
764
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
765
+
766
+ - name: Add libopenblas.dll
767
+ id: add_libopenblas_dll
768
+ if: ${{ matrix.build == 'openblas-x64' }}
769
+ run: |
770
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
771
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
772
+
773
+ - name: Check AVX512F support
774
+ id: check_avx512f
775
+ if: ${{ matrix.build == 'avx512-x64' }}
776
+ continue-on-error: true
777
+ run: |
778
+ cd build
779
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
780
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
781
+ $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
782
+ echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
783
+ & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
784
+ .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
785
+
786
+ - name: Test
787
+ id: cmake_test
788
+ # not all machines have native AVX-512
789
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
790
+ run: |
791
+ cd build
792
+ ctest -L main -C Release --verbose --timeout 900
793
+
794
+ - name: Test (Intel SDE)
795
+ id: cmake_test_sde
796
+ if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
797
+ run: |
798
+ curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
799
+ # for some weird reason windows tar doesn't like sde tar.xz
800
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
801
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
802
+ $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
803
+ cd build
804
+ $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
805
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
806
+
807
+ - name: Determine tag name
808
+ id: tag
809
+ shell: bash
810
+ run: |
811
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
812
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
813
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
814
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
815
+ else
816
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
817
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
818
+ fi
819
+
820
+ - name: Pack artifacts
821
+ id: pack_artifacts
822
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
823
+ run: |
824
+ Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
825
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
826
+
827
+ - name: Upload artifacts
828
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
829
+ uses: actions/upload-artifact@v4
830
+ with:
831
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
832
+ name: llama-bin-win-${{ matrix.build }}.zip
833
+
834
+ windows-latest-cmake-cuda:
835
+ runs-on: windows-2019
836
+
837
+ strategy:
838
+ matrix:
839
+ cuda: ['12.2.0', '11.7.1']
840
+ build: ['cuda']
841
+
842
+ steps:
843
+ - name: Clone
844
+ id: checkout
845
+ uses: actions/checkout@v4
846
+ with:
847
+ fetch-depth: 0
848
+
849
+ - name: Install CUDA toolkit
850
+ id: cuda-toolkit
851
+ uses: Jimver/cuda-toolkit@v0.2.15
852
+ with:
853
+ cuda: ${{ matrix.cuda }}
854
+ method: 'network'
855
+ sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
856
+
857
+ - name: Build
858
+ id: cmake_build
859
+ run: |
860
+ mkdir build
861
+ cd build
862
+ cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863
+ cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
864
+
865
+ - name: Determine tag name
866
+ id: tag
867
+ shell: bash
868
+ run: |
869
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
870
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
871
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
872
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
873
+ else
874
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
875
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
876
+ fi
877
+
878
+ - name: Pack artifacts
879
+ id: pack_artifacts
880
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
881
+ run: |
882
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
883
+
884
+ - name: Upload artifacts
885
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
886
+ uses: actions/upload-artifact@v4
887
+ with:
888
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
889
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
890
+
891
+ - name: Copy and pack Cuda runtime
892
+ run: |
893
+ echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
894
+ $dst='.\build\bin\cudart\'
895
+ robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
896
+ 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
897
+
898
+ - name: Upload Cuda runtime
899
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
900
+ uses: actions/upload-artifact@v4
901
+ with:
902
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
903
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
904
+
905
+ windows-latest-cmake-sycl:
906
+ runs-on: windows-latest
907
+
908
+ defaults:
909
+ run:
910
+ shell: bash
911
+
912
+ env:
913
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe
914
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
915
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
916
+ steps:
917
+ - name: Clone
918
+ id: checkout
919
+ uses: actions/checkout@v4
920
+ with:
921
+ fetch-depth: 0
922
+
923
+ - name: Install
924
+ run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
925
+
926
+ - name: Build
927
+ id: cmake_build
928
+ run: examples/sycl/win-build-sycl.bat
929
+
930
+ - name: Determine tag name
931
+ id: tag
932
+ shell: bash
933
+ run: |
934
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
935
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
936
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
937
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
938
+ else
939
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
940
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
941
+ fi
942
+
943
+ - name: Pack artifacts
944
+ id: pack_artifacts
945
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
946
+ run: |
947
+ echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
948
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin
949
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
950
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
951
+
952
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin
953
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin
954
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
955
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
956
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
957
+ echo "cp oneAPI running time dll files to ./build/bin done"
958
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
959
+
960
+ - name: Upload artifacts
961
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
962
+ uses: actions/upload-artifact@v4
963
+ with:
964
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
965
+ name: llama-bin-win-sycl-x64.zip
966
+
967
+ windows-latest-cmake-hip:
968
+ runs-on: windows-latest
969
+
970
+ steps:
971
+ - name: Clone
972
+ id: checkout
973
+ uses: actions/checkout@v3
974
+
975
+ - name: Install
976
+ id: depends
977
+ run: |
978
+ $ErrorActionPreference = "Stop"
979
+ write-host "Downloading AMD HIP SDK Installer"
980
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
981
+ write-host "Installing AMD HIP SDK"
982
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
983
+ write-host "Completed AMD HIP SDK installation"
984
+
985
+ - name: Verify ROCm
986
+ id: verify
987
+ run: |
988
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
989
+
990
+ - name: Build
991
+ id: cmake_build
992
+ run: |
993
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
994
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
995
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
996
+ cmake --build build --config Release
997
+
998
+ ios-xcode-build:
999
+ runs-on: macos-latest
1000
+
1001
+ steps:
1002
+ - name: Checkout code
1003
+ uses: actions/checkout@v4
1004
+
1005
+ - name: Build Xcode project
1006
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1007
+
1008
+ android-build:
1009
+ runs-on: ubuntu-latest
1010
+
1011
+ steps:
1012
+ - name: Clone
1013
+ uses: actions/checkout@v4
1014
+
1015
+ - name: Set up JDK
1016
+ uses: actions/setup-java@v3
1017
+ with:
1018
+ java-version: 17
1019
+ distribution: zulu
1020
+
1021
+ - name: Setup Android SDK
1022
+ uses: android-actions/setup-android@v3
1023
+ with:
1024
+ log-accepted-android-sdk-licenses: false
1025
+
1026
+ - name: Build
1027
+ run: |
1028
+ cd examples/llama.android
1029
+
1030
+ ./gradlew build --no-daemon
1031
+
1032
+ # freeBSD-latest:
1033
+ # runs-on: macos-12
1034
+ # steps:
1035
+ # - name: Clone
1036
+ # uses: actions/checkout@v4
1037
+ #
1038
+ # - name: Build
1039
+ # uses: cross-platform-actions/action@v0.19.0
1040
+ # with:
1041
+ # operating_system: freebsd
1042
+ # version: '13.2'
1043
+ # hypervisor: 'qemu'
1044
+ # run: |
1045
+ # sudo pkg update
1046
+ # sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
1047
+ # gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
1048
+
1049
+ release:
1050
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1051
+
1052
+ runs-on: ubuntu-latest
1053
+
1054
+ needs:
1055
+ - ubuntu-focal-make
1056
+ - ubuntu-latest-cmake
1057
+ - macOS-latest-make
1058
+ - macOS-latest-cmake
1059
+ - windows-latest-cmake
1060
+ - windows-latest-cmake-cuda
1061
+ - macOS-latest-cmake-arm64
1062
+ - macOS-latest-cmake-x64
1063
+
1064
+ steps:
1065
+ - name: Clone
1066
+ id: checkout
1067
+ uses: actions/checkout@v4
1068
+ with:
1069
+ fetch-depth: 0
1070
+
1071
+ - name: Determine tag name
1072
+ id: tag
1073
+ shell: bash
1074
+ run: |
1075
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1076
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1077
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1078
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1079
+ else
1080
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1081
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1082
+ fi
1083
+
1084
+ - name: Download artifacts
1085
+ id: download-artifact
1086
+ uses: actions/download-artifact@v4
1087
+ with:
1088
+ path: ./artifact
1089
+
1090
+ - name: Move artifacts
1091
+ id: move_artifacts
1092
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
1093
+
1094
+ - name: Create release
1095
+ id: create_release
1096
+ uses: anzz1/action-create-release@v1
1097
+ env:
1098
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1099
+ with:
1100
+ tag_name: ${{ steps.tag.outputs.name }}
1101
+
1102
+ - name: Upload release
1103
+ id: upload_release
1104
+ uses: actions/github-script@v3
1105
+ with:
1106
+ github-token: ${{secrets.GITHUB_TOKEN}}
1107
+ script: |
1108
+ const path = require('path');
1109
+ const fs = require('fs');
1110
+ const release_id = '${{ steps.create_release.outputs.id }}';
1111
+ for (let file of await fs.readdirSync('./artifact/release')) {
1112
+ if (path.extname(file) === '.zip') {
1113
+ console.log('uploadReleaseAsset', file);
1114
+ await github.repos.uploadReleaseAsset({
1115
+ owner: context.repo.owner,
1116
+ repo: context.repo.repo,
1117
+ release_id: release_id,
1118
+ name: file,
1119
+ data: await fs.readFileSync(`./artifact/release/${file}`)
1120
+ });
1121
+ }
1122
+ }
1123
+
1124
+ # ubuntu-latest-gcc:
1125
+ # runs-on: ubuntu-latest
1126
+ #
1127
+ # strategy:
1128
+ # matrix:
1129
+ # build: [Debug, Release]
1130
+ #
1131
+ # steps:
1132
+ # - name: Clone
1133
+ # uses: actions/checkout@v4
1134
+ #
1135
+ # - name: Dependencies
1136
+ # run: |
1137
+ # sudo apt-get update
1138
+ # sudo apt-get install build-essential
1139
+ # sudo apt-get install cmake
1140
+ #
1141
+ # - name: Configure
1142
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1143
+ #
1144
+ # - name: Build
1145
+ # run: |
1146
+ # make
1147
+ #
1148
+ # ubuntu-latest-clang:
1149
+ # runs-on: ubuntu-latest
1150
+ #
1151
+ # strategy:
1152
+ # matrix:
1153
+ # build: [Debug, Release]
1154
+ #
1155
+ # steps:
1156
+ # - name: Clone
1157
+ # uses: actions/checkout@v4
1158
+ #
1159
+ # - name: Dependencies
1160
+ # run: |
1161
+ # sudo apt-get update
1162
+ # sudo apt-get install build-essential
1163
+ # sudo apt-get install cmake
1164
+ #
1165
+ # - name: Configure
1166
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
1167
+ #
1168
+ # - name: Build
1169
+ # run: |
1170
+ # make
1171
+ #
1172
+ # ubuntu-latest-gcc-sanitized:
1173
+ # runs-on: ubuntu-latest
1174
+ #
1175
+ # strategy:
1176
+ # matrix:
1177
+ # sanitizer: [ADDRESS, THREAD, UNDEFINED]
1178
+ #
1179
+ # steps:
1180
+ # - name: Clone
1181
+ # uses: actions/checkout@v4
1182
+ #
1183
+ # - name: Dependencies
1184
+ # run: |
1185
+ # sudo apt-get update
1186
+ # sudo apt-get install build-essential
1187
+ # sudo apt-get install cmake
1188
+ #
1189
+ # - name: Configure
1190
+ # run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
1191
+ #
1192
+ # - name: Build
1193
+ # run: |
1194
+ # make
1195
+ #
1196
+ # windows:
1197
+ # runs-on: windows-latest
1198
+ #
1199
+ # strategy:
1200
+ # matrix:
1201
+ # build: [Release]
1202
+ # arch: [Win32, x64]
1203
+ # include:
1204
+ # - arch: Win32
1205
+ # s2arc: x86
1206
+ # - arch: x64
1207
+ # s2arc: x64
1208
+ #
1209
+ # steps:
1210
+ # - name: Clone
1211
+ # uses: actions/checkout@v4
1212
+ #
1213
+ # - name: Add msbuild to PATH
1214
+ # uses: microsoft/setup-msbuild@v1
1215
+ #
1216
+ # - name: Configure
1217
+ # run: >
1218
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1219
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1220
+ #
1221
+ # - name: Build
1222
+ # run: |
1223
+ # cd ./build
1224
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1225
+ #
1226
+ # - name: Upload binaries
1227
+ # uses: actions/upload-artifact@v4
1228
+ # with:
1229
+ # name: llama-bin-${{ matrix.arch }}
1230
+ # path: build/bin/${{ matrix.build }}
1231
+ #
1232
+ # windows-blas:
1233
+ # runs-on: windows-latest
1234
+ #
1235
+ # strategy:
1236
+ # matrix:
1237
+ # build: [Release]
1238
+ # arch: [Win32, x64]
1239
+ # blas: [ON]
1240
+ # include:
1241
+ # - arch: Win32
1242
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
1243
+ # s2arc: x86
1244
+ # - arch: x64
1245
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
1246
+ # s2arc: x64
1247
+ #
1248
+ # steps:
1249
+ # - name: Clone
1250
+ # uses: actions/checkout@v4
1251
+ #
1252
+ # - name: Add msbuild to PATH
1253
+ # uses: microsoft/setup-msbuild@v1
1254
+ #
1255
+ # - name: Fetch OpenBLAS
1256
+ # if: matrix.blas == 'ON'
1257
+ # run: |
1258
+ # C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
1259
+ # 7z x blas.zip -oblas -y
1260
+ # copy blas/include/cblas.h .
1261
+ # copy blas/include/openblas_config.h .
1262
+ # echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
1263
+ #
1264
+ # - name: Configure
1265
+ # run: >
1266
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1267
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1268
+ # -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
1269
+ # -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
1270
+ #
1271
+ # - name: Build
1272
+ # run: |
1273
+ # cd ./build
1274
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1275
+ #
1276
+ # - name: Copy libopenblas.dll
1277
+ # if: matrix.blas == 'ON'
1278
+ # run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
1279
+ #
1280
+ # - name: Upload binaries
1281
+ # if: matrix.blas == 'ON'
1282
+ # uses: actions/upload-artifact@v4
1283
+ # with:
1284
+ # name: llama-blas-bin-${{ matrix.arch }}
1285
+ # path: build/bin/${{ matrix.build }}
1286
+ #
1287
+ # emscripten:
1288
+ # runs-on: ubuntu-latest
1289
+ #
1290
+ # strategy:
1291
+ # matrix:
1292
+ # build: [Release]
1293
+ #
1294
+ # steps:
1295
+ # - name: Clone
1296
+ # uses: actions/checkout@v4
1297
+ #
1298
+ # - name: Dependencies
1299
+ # run: |
1300
+ # wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
1301
+ # tar -xvf master.tar.gz
1302
+ # emsdk-master/emsdk update
1303
+ # emsdk-master/emsdk install latest
1304
+ # emsdk-master/emsdk activate latest
1305
+ #
1306
+ # - name: Configure
1307
+ # run: echo "tmp"
1308
+ #
1309
+ # - name: Build
1310
+ # run: |
1311
+ # pushd emsdk-master
1312
+ # source ./emsdk_env.sh
1313
+ # popd
1314
+ # emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1315
+ # make