@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -19,10 +19,18 @@ concurrency:
19
19
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
20
  cancel-in-progress: true
21
21
 
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ contents: write # for creating release
26
+
22
27
  env:
23
28
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
24
29
  GGML_NLOOP: 3
25
30
  GGML_N_THREADS: 1
31
+ LLAMA_LOG_COLORS: 1
32
+ LLAMA_LOG_PREFIX: 1
33
+ LLAMA_LOG_TIMESTAMPS: 1
26
34
 
27
35
  jobs:
28
36
  macOS-latest-cmake-arm64:
@@ -47,7 +55,13 @@ jobs:
47
55
  sysctl -a
48
56
  mkdir build
49
57
  cd build
50
- cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
58
+ cmake .. \
59
+ -DLLAMA_FATAL_WARNINGS=ON \
60
+ -DLLAMA_CURL=ON \
61
+ -DGGML_METAL_USE_BF16=ON \
62
+ -DGGML_METAL_EMBED_LIBRARY=ON \
63
+ -DGGML_RPC=ON \
64
+ -DBUILD_SHARED_LIBS=OFF
51
65
  cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
52
66
 
53
67
  - name: Test
@@ -84,7 +98,7 @@ jobs:
84
98
  name: llama-bin-macos-arm64.zip
85
99
 
86
100
  macOS-latest-cmake-x64:
87
- runs-on: macos-12
101
+ runs-on: macos-13
88
102
 
89
103
  steps:
90
104
  - name: Clone
@@ -105,7 +119,12 @@ jobs:
105
119
  sysctl -a
106
120
  # Metal is disabled due to intermittent failures with Github runners not having a GPU:
107
121
  # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108
- cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
122
+ cmake -B build \
123
+ -DLLAMA_FATAL_WARNINGS=ON \
124
+ -DLLAMA_CURL=ON \
125
+ -DGGML_METAL=OFF \
126
+ -DGGML_RPC=ON \
127
+ -DBUILD_SHARED_LIBS=OFF
109
128
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110
129
 
111
130
  - name: Test
@@ -222,7 +241,7 @@ jobs:
222
241
  run: |
223
242
  mkdir build
224
243
  cd build
225
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
244
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
226
245
  cmake --build . --config Release -j $(nproc)
227
246
 
228
247
  - name: Test
@@ -375,7 +394,7 @@ jobs:
375
394
  steps:
376
395
  - name: Clone
377
396
  id: checkout
378
- uses: actions/checkout@v3
397
+ uses: actions/checkout@v4
379
398
 
380
399
  - name: Dependencies
381
400
  id: depends
@@ -386,22 +405,43 @@ jobs:
386
405
  - name: Build with native CMake HIP support
387
406
  id: cmake_build
388
407
  run: |
389
- cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
408
+ cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
390
409
  cmake --build build --config Release -j $(nproc)
391
410
 
392
411
  - name: Build with legacy HIP support
393
412
  id: cmake_build_legacy_hip
394
413
  run: |
395
- cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
414
+ cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
396
415
  cmake --build build2 --config Release -j $(nproc)
397
416
 
417
+ ubuntu-22-cmake-musa:
418
+ runs-on: ubuntu-22.04
419
+ container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
420
+
421
+ steps:
422
+ - name: Clone
423
+ id: checkout
424
+ uses: actions/checkout@v4
425
+
426
+ - name: Dependencies
427
+ id: depends
428
+ run: |
429
+ apt-get update
430
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
431
+
432
+ - name: Build with native CMake MUSA support
433
+ id: cmake_build
434
+ run: |
435
+ cmake -B build -S . -DGGML_MUSA=ON
436
+ cmake --build build --config Release -j $(nproc)
437
+
398
438
  ubuntu-22-cmake-sycl:
399
439
  runs-on: ubuntu-22.04
400
440
 
401
441
  continue-on-error: true
402
442
 
403
443
  steps:
404
- - uses: actions/checkout@v2
444
+ - uses: actions/checkout@v4
405
445
 
406
446
  - name: add oneAPI to apt
407
447
  shell: bash
@@ -442,7 +482,7 @@ jobs:
442
482
  continue-on-error: true
443
483
 
444
484
  steps:
445
- - uses: actions/checkout@v2
485
+ - uses: actions/checkout@v4
446
486
 
447
487
  - name: add oneAPI to apt
448
488
  shell: bash
@@ -546,7 +586,7 @@ jobs:
546
586
  steps:
547
587
  - name: Clone
548
588
  id: checkout
549
- uses: actions/checkout@v1
589
+ uses: actions/checkout@v4
550
590
 
551
591
  - name: Dependencies
552
592
  id: depends
@@ -561,6 +601,7 @@ jobs:
561
601
  mkdir build
562
602
  cd build
563
603
  cmake -G Xcode .. \
604
+ -DGGML_METAL_USE_BF16=ON \
564
605
  -DGGML_METAL_EMBED_LIBRARY=ON \
565
606
  -DLLAMA_BUILD_EXAMPLES=OFF \
566
607
  -DLLAMA_BUILD_TESTS=OFF \
@@ -576,7 +617,7 @@ jobs:
576
617
  steps:
577
618
  - name: Clone
578
619
  id: checkout
579
- uses: actions/checkout@v1
620
+ uses: actions/checkout@v4
580
621
 
581
622
  - name: Dependencies
582
623
  id: depends
@@ -591,6 +632,7 @@ jobs:
591
632
  mkdir build
592
633
  cd build
593
634
  cmake -G Xcode .. \
635
+ -DGGML_METAL_USE_BF16=ON \
594
636
  -DGGML_METAL_EMBED_LIBRARY=ON \
595
637
  -DLLAMA_BUILD_EXAMPLES=OFF \
596
638
  -DLLAMA_BUILD_TESTS=OFF \
@@ -610,7 +652,7 @@ jobs:
610
652
  steps:
611
653
  - name: Clone
612
654
  id: checkout
613
- uses: actions/checkout@v1
655
+ uses: actions/checkout@v4
614
656
 
615
657
  - name: Dependencies
616
658
  id: depends
@@ -696,22 +738,20 @@ jobs:
696
738
  strategy:
697
739
  matrix:
698
740
  include:
699
- - build: 'rpc-x64'
700
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
701
741
  - build: 'noavx-x64'
702
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
742
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
703
743
  - build: 'avx2-x64'
704
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
744
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
705
745
  - build: 'avx-x64'
706
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
746
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
707
747
  - build: 'avx512-x64'
708
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
748
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709
749
  - build: 'openblas-x64'
710
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
750
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
711
751
  - build: 'kompute-x64'
712
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
752
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
713
753
  - build: 'vulkan-x64'
714
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
754
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
715
755
  - build: 'llvm-arm64'
716
756
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
717
757
  - build: 'msvc-arm64'
@@ -728,7 +768,7 @@ jobs:
728
768
  id: clone_kompute
729
769
  if: ${{ matrix.build == 'kompute-x64' }}
730
770
  run: |
731
- git submodule update --init ggml/src/kompute
771
+ git submodule update --init ggml/src/ggml-kompute/kompute
732
772
 
733
773
  - name: Download OpenBLAS
734
774
  id: get_openblas
@@ -859,8 +899,9 @@ jobs:
859
899
  run: |
860
900
  mkdir build
861
901
  cd build
862
- cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863
- cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
902
+ cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
903
+ cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
904
+ cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
864
905
 
865
906
  - name: Determine tag name
866
907
  id: tag
@@ -910,7 +951,7 @@ jobs:
910
951
  shell: bash
911
952
 
912
953
  env:
913
- WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe
954
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
914
955
  WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
915
956
  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
916
957
  steps:
@@ -954,6 +995,7 @@ jobs:
954
995
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
955
996
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
956
997
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
998
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
957
999
  echo "cp oneAPI running time dll files to ./build/bin done"
958
1000
  7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
959
1001
 
@@ -965,19 +1007,56 @@ jobs:
965
1007
  name: llama-bin-win-sycl-x64.zip
966
1008
 
967
1009
  windows-latest-cmake-hip:
1010
+ if: ${{ github.event.inputs.create_release != 'true' }}
1011
+ runs-on: windows-latest
1012
+
1013
+ steps:
1014
+ - name: Clone
1015
+ id: checkout
1016
+ uses: actions/checkout@v4
1017
+
1018
+ - name: Install
1019
+ id: depends
1020
+ run: |
1021
+ $ErrorActionPreference = "Stop"
1022
+ write-host "Downloading AMD HIP SDK Installer"
1023
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1024
+ write-host "Installing AMD HIP SDK"
1025
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1026
+ write-host "Completed AMD HIP SDK installation"
1027
+
1028
+ - name: Verify ROCm
1029
+ id: verify
1030
+ run: |
1031
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1032
+
1033
+ - name: Build
1034
+ id: cmake_build
1035
+ run: |
1036
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1037
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1038
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1039
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1040
+
1041
+ windows-latest-cmake-hip-release:
1042
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
968
1043
  runs-on: windows-latest
969
1044
 
1045
+ strategy:
1046
+ matrix:
1047
+ gpu_target: [gfx1100, gfx1101, gfx1030]
1048
+
970
1049
  steps:
971
1050
  - name: Clone
972
1051
  id: checkout
973
- uses: actions/checkout@v3
1052
+ uses: actions/checkout@v4
974
1053
 
975
1054
  - name: Install
976
1055
  id: depends
977
1056
  run: |
978
1057
  $ErrorActionPreference = "Stop"
979
1058
  write-host "Downloading AMD HIP SDK Installer"
980
- Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1059
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
981
1060
  write-host "Installing AMD HIP SDK"
982
1061
  Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
983
1062
  write-host "Completed AMD HIP SDK installation"
@@ -992,8 +1071,36 @@ jobs:
992
1071
  run: |
993
1072
  $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
994
1073
  $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
995
- cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
996
- cmake --build build --config Release
1074
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1075
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1076
+ md "build\bin\rocblas\library\"
1077
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1078
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1079
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1080
+
1081
+ - name: Determine tag name
1082
+ id: tag
1083
+ shell: bash
1084
+ run: |
1085
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1086
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1087
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1088
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1089
+ else
1090
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1091
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1092
+ fi
1093
+
1094
+ - name: Pack artifacts
1095
+ id: pack_artifacts
1096
+ run: |
1097
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1098
+
1099
+ - name: Upload artifacts
1100
+ uses: actions/upload-artifact@v4
1101
+ with:
1102
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1103
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
997
1104
 
998
1105
  ios-xcode-build:
999
1106
  runs-on: macos-latest
@@ -1058,6 +1165,7 @@ jobs:
1058
1165
  - macOS-latest-cmake
1059
1166
  - windows-latest-cmake
1060
1167
  - windows-latest-cmake-cuda
1168
+ - windows-latest-cmake-hip-release
1061
1169
  - macOS-latest-cmake-arm64
1062
1170
  - macOS-latest-cmake-x64
1063
1171
 
@@ -3,6 +3,11 @@ on:
3
3
  schedule:
4
4
  - cron: "42 0 * * *"
5
5
 
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
6
11
  jobs:
7
12
  close-issues:
8
13
  runs-on: ubuntu-latest
@@ -15,11 +15,17 @@ on:
15
15
  branches:
16
16
  - master
17
17
  paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
18
+ workflow_dispatch: # allows manual triggering, useful for debugging
18
19
 
19
20
  concurrency:
20
21
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
21
22
  cancel-in-progress: true
22
23
 
24
+ # Fine-grant permission
25
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26
+ permissions:
27
+ packages: write
28
+
23
29
  jobs:
24
30
  push_to_registry:
25
31
  name: Push Docker image to Docker Hub
@@ -37,15 +43,20 @@ jobs:
37
43
  - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
38
44
  - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
39
45
  - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
40
- - { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
41
- - { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
42
- # Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete.
46
+ - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
47
+ - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
48
+ - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
49
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
50
+ #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
51
+ #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
43
52
  #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
44
53
  - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
45
54
  - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
46
55
  steps:
47
56
  - name: Check out the repo
48
57
  uses: actions/checkout@v4
58
+ with:
59
+ fetch-depth: 0 # preserve git history, so we can determine the build number
49
60
 
50
61
  - name: Set up QEMU
51
62
  uses: docker/setup-qemu-action@v2
@@ -60,6 +71,34 @@ jobs:
60
71
  username: ${{ github.repository_owner }}
61
72
  password: ${{ secrets.GITHUB_TOKEN }}
62
73
 
74
+ - name: Determine tag name
75
+ id: tag
76
+ shell: bash
77
+ run: |
78
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
79
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
80
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
81
+ REPO_NAME="${{ github.event.repository.name }}"
82
+
83
+ # determine tag name postfix (build number, commit hash)
84
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
85
+ TAG_POSTFIX="b${BUILD_NUMBER}"
86
+ else
87
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
88
+ TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
89
+ fi
90
+
91
+ # list all tags possible
92
+ TAGS=""
93
+ TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
94
+ TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
95
+
96
+ echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
97
+ echo "output_tags=$TAGS" # print out for debugging
98
+ env:
99
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
100
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
101
+
63
102
  # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
64
103
  - name: Free Disk Space (Ubuntu)
65
104
  uses: jlumbroso/free-disk-space@main
@@ -77,40 +116,13 @@ jobs:
77
116
  docker-images: true
78
117
  swap-storage: true
79
118
 
80
- - name: Determine tag name
81
- id: tag
82
- shell: bash
83
- run: |
84
- BUILD_NUMBER="$(git rev-list --count HEAD)"
85
- SHORT_HASH="$(git rev-parse --short=7 HEAD)"
86
- if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
87
- echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
88
- else
89
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
90
- echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
91
- fi
92
-
93
- - name: Downcase github.repository_owner
94
- run: |
95
- echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
96
- env:
97
- GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
98
-
99
- - name: Build and push Docker image (versioned)
119
+ - name: Build and push Docker image (tagged + versioned)
100
120
  if: github.event_name == 'push'
101
- uses: docker/build-push-action@v4
121
+ uses: docker/build-push-action@v6
102
122
  with:
103
123
  context: .
104
124
  push: true
105
125
  platforms: ${{ matrix.config.platforms }}
106
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
107
- file: ${{ matrix.config.dockerfile }}
108
-
109
- - name: Build and push Docker image (tagged)
110
- uses: docker/build-push-action@v4
111
- with:
112
- context: .
113
- push: ${{ github.event_name == 'push' }}
114
- platforms: ${{ matrix.config.platforms }}
115
- tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
126
+ # tag list is generated from step above
127
+ tags: ${{ steps.tag.outputs.output_tags }}
116
128
  file: ${{ matrix.config.dockerfile }}
@@ -21,6 +21,13 @@ concurrency:
21
21
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
22
22
  cancel-in-progress: true
23
23
 
24
+ # Fine-grant permission
25
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26
+ permissions:
27
+ # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28
+ id-token: write
29
+ contents: read
30
+
24
31
  jobs:
25
32
  nix-build-aarch64:
26
33
  runs-on: ubuntu-latest
@@ -12,6 +12,13 @@ concurrency:
12
12
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
13
13
  cancel-in-progress: true
14
14
 
15
+ # Fine-grant permission
16
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17
+ permissions:
18
+ # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19
+ id-token: write
20
+ contents: read
21
+
15
22
  jobs:
16
23
  nix-eval:
17
24
  strategy:
@@ -6,15 +6,13 @@ on:
6
6
  - '.github/workflows/python-check-requirements.yml'
7
7
  - 'scripts/check-requirements.sh'
8
8
  - 'convert*.py'
9
- - 'requirements.txt'
10
- - 'requirements/*.txt'
9
+ - '**/requirements*.txt'
11
10
  pull_request:
12
11
  paths:
13
12
  - '.github/workflows/python-check-requirements.yml'
14
13
  - 'scripts/check-requirements.sh'
15
14
  - 'convert*.py'
16
- - 'requirements.txt'
17
- - 'requirements/*.txt'
15
+ - '**/requirements*.txt'
18
16
 
19
17
  concurrency:
20
18
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -4,11 +4,13 @@ on:
4
4
  push:
5
5
  paths:
6
6
  - '.github/workflows/python-type-check.yml'
7
+ - 'pyrightconfig.json'
7
8
  - '**.py'
8
9
  - '**/requirements*.txt'
9
10
  pull_request:
10
11
  paths:
11
12
  - '.github/workflows/python-type-check.yml'
13
+ - 'pyrightconfig.json'
12
14
  - '**.py'
13
15
  - '**/requirements*.txt'
14
16
 
@@ -33,6 +35,6 @@ jobs:
33
35
  - name: Type-check with Pyright
34
36
  uses: jakebailey/pyright-action@v2
35
37
  with:
36
- version: 1.1.370
38
+ version: 1.1.382
37
39
  level: warning
38
40
  warnings: true
@@ -20,6 +20,12 @@ on:
20
20
  types: [opened, synchronize, reopened]
21
21
  paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
22
22
 
23
+ env:
24
+ LLAMA_LOG_COLORS: 1
25
+ LLAMA_LOG_PREFIX: 1
26
+ LLAMA_LOG_TIMESTAMPS: 1
27
+ LLAMA_LOG_VERBOSITY: 10
28
+
23
29
  concurrency:
24
30
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
25
31
  cancel-in-progress: true
@@ -173,6 +179,7 @@ jobs:
173
179
  if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
174
180
  run: |
175
181
  cd examples/server/tests
182
+ $env:PYTHONIOENCODING = ":replace"
176
183
  behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
177
184
 
178
185
  - name: Slow tests
@@ -62,6 +62,9 @@ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
62
62
  option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
63
63
  option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
64
64
 
65
+ # utils
66
+ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
67
+
65
68
  # extra artifacts
66
69
  option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
67
70
  option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -82,11 +85,15 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
82
85
 
83
86
  # change the default for these ggml options
84
87
  if (NOT DEFINED GGML_LLAMAFILE)
85
- set(GGML_LLAMAFILE ON)
88
+ set(GGML_LLAMAFILE_DEFAULT ON)
89
+ endif()
90
+
91
+ if (NOT DEFINED GGML_AMX)
92
+ set(GGML_AMX ON)
86
93
  endif()
87
94
 
88
- if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
89
- set(GGML_CUDA_USE_GRAPHS ON)
95
+ if (NOT DEFINED GGML_CUDA_GRAPHS)
96
+ set(GGML_CUDA_GRAPHS_DEFAULT ON)
90
97
  endif()
91
98
 
92
99
  # transition helpers
@@ -133,15 +140,21 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
133
140
  set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
134
141
  set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
135
142
 
136
-
137
143
  # At the moment some compile definitions are placed within the ggml/src
138
144
  # directory but not exported on the `ggml` target. This could be improved by
139
145
  # determining _precisely_ which defines are necessary for the llama-config
140
146
  # package.
141
147
  #
142
- get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
148
+ set(GGML_TRANSIENT_DEFINES)
149
+ get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
150
+ get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
151
+ if (GGML_DIR_DEFINES)
152
+ list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
153
+ endif()
143
154
  get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
144
- set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
155
+ if (GGML_TARGET_DEFINES)
156
+ list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
157
+ endif()
145
158
  get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
146
159
 
147
160
  set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
@@ -184,17 +197,19 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
184
197
  DESTINATION lib/pkgconfig)
185
198
 
186
199
  #
187
- # programs, examples and tests
200
+ # utils, programs, examples and tests
188
201
  #
189
202
 
190
- add_subdirectory(common)
203
+ if (LLAMA_BUILD_COMMON)
204
+ add_subdirectory(common)
205
+ endif()
191
206
 
192
- if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
207
+ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
193
208
  include(CTest)
194
209
  add_subdirectory(tests)
195
- endif ()
210
+ endif()
196
211
 
197
- if (LLAMA_BUILD_EXAMPLES)
212
+ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
198
213
  add_subdirectory(examples)
199
214
  add_subdirectory(pocs)
200
215
  endif()
@@ -0,0 +1,16 @@
1
+ set( CMAKE_SYSTEM_NAME Darwin )
2
+ set( CMAKE_SYSTEM_PROCESSOR arm64 )
3
+
4
+ set( target arm64-apple-darwin-macho )
5
+
6
+ set( CMAKE_C_COMPILER clang )
7
+ set( CMAKE_CXX_COMPILER clang++ )
8
+
9
+ set( CMAKE_C_COMPILER_TARGET ${target} )
10
+ set( CMAKE_CXX_COMPILER_TARGET ${target} )
11
+
12
+ set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13
+ set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
14
+
15
+ set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
16
+ set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )