@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -55,7 +55,13 @@ jobs:
55
55
  sysctl -a
56
56
  mkdir build
57
57
  cd build
58
- cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
58
+ cmake .. \
59
+ -DLLAMA_FATAL_WARNINGS=ON \
60
+ -DLLAMA_CURL=ON \
61
+ -DGGML_METAL_USE_BF16=ON \
62
+ -DGGML_METAL_EMBED_LIBRARY=ON \
63
+ -DGGML_RPC=ON \
64
+ -DBUILD_SHARED_LIBS=OFF
59
65
  cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
60
66
 
61
67
  - name: Test
@@ -92,7 +98,7 @@ jobs:
92
98
  name: llama-bin-macos-arm64.zip
93
99
 
94
100
  macOS-latest-cmake-x64:
95
- runs-on: macos-12
101
+ runs-on: macos-13
96
102
 
97
103
  steps:
98
104
  - name: Clone
@@ -113,7 +119,12 @@ jobs:
113
119
  sysctl -a
114
120
  # Metal is disabled due to intermittent failures with Github runners not having a GPU:
115
121
  # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
116
- cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
122
+ cmake -B build \
123
+ -DLLAMA_FATAL_WARNINGS=ON \
124
+ -DLLAMA_CURL=ON \
125
+ -DGGML_METAL=OFF \
126
+ -DGGML_RPC=ON \
127
+ -DBUILD_SHARED_LIBS=OFF
117
128
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
118
129
 
119
130
  - name: Test
@@ -149,66 +160,6 @@ jobs:
149
160
  path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
150
161
  name: llama-bin-macos-x64.zip
151
162
 
152
- ubuntu-focal-make:
153
- runs-on: ubuntu-20.04
154
- env:
155
- LLAMA_NODE_AVAILABLE: true
156
- LLAMA_PYTHON_AVAILABLE: true
157
-
158
- steps:
159
- - name: Clone
160
- id: checkout
161
- uses: actions/checkout@v4
162
-
163
- - name: Dependencies
164
- id: depends
165
- run: |
166
- sudo apt-get update
167
- sudo apt-get install build-essential gcc-8
168
-
169
- - uses: actions/setup-node@v4
170
- with:
171
- node-version: "20"
172
-
173
- - uses: actions/setup-python@v5
174
- with:
175
- python-version: "3.11"
176
-
177
- - name: Build
178
- id: make_build
179
- env:
180
- LLAMA_FATAL_WARNINGS: 1
181
- run: |
182
- CC=gcc-8 make -j $(nproc)
183
-
184
- - name: Test
185
- id: make_test
186
- run: |
187
- CC=gcc-8 make tests -j $(nproc)
188
- make test -j $(nproc)
189
-
190
- ubuntu-focal-make-curl:
191
- runs-on: ubuntu-20.04
192
-
193
- steps:
194
- - name: Clone
195
- id: checkout
196
- uses: actions/checkout@v4
197
-
198
- - name: Dependencies
199
- id: depends
200
- run: |
201
- sudo apt-get update
202
- sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
203
-
204
- - name: Build
205
- id: make_build
206
- env:
207
- LLAMA_FATAL_WARNINGS: 1
208
- LLAMA_CURL: 1
209
- run: |
210
- CC=gcc-8 make -j $(nproc)
211
-
212
163
  ubuntu-latest-cmake:
213
164
  runs-on: ubuntu-latest
214
165
 
@@ -366,7 +317,7 @@ jobs:
366
317
  wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
367
318
  sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
368
319
  sudo apt-get update -y
369
- sudo apt-get install -y build-essential vulkan-sdk
320
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
370
321
 
371
322
  - name: Build
372
323
  id: cmake_build
@@ -376,6 +327,12 @@ jobs:
376
327
  cmake -DGGML_VULKAN=ON ..
377
328
  cmake --build . --config Release -j $(nproc)
378
329
 
330
+ - name: Test
331
+ id: cmake_test
332
+ run: |
333
+ cd build
334
+ ctest -L main --verbose --timeout 900
335
+
379
336
  ubuntu-22-cmake-hip:
380
337
  runs-on: ubuntu-22.04
381
338
  container: rocm/dev-ubuntu-22.04:6.0.2
@@ -394,15 +351,36 @@ jobs:
394
351
  - name: Build with native CMake HIP support
395
352
  id: cmake_build
396
353
  run: |
397
- cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
354
+ cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
398
355
  cmake --build build --config Release -j $(nproc)
399
356
 
400
357
  - name: Build with legacy HIP support
401
358
  id: cmake_build_legacy_hip
402
359
  run: |
403
- cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
360
+ cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
404
361
  cmake --build build2 --config Release -j $(nproc)
405
362
 
363
+ ubuntu-22-cmake-musa:
364
+ runs-on: ubuntu-22.04
365
+ container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
366
+
367
+ steps:
368
+ - name: Clone
369
+ id: checkout
370
+ uses: actions/checkout@v4
371
+
372
+ - name: Dependencies
373
+ id: depends
374
+ run: |
375
+ apt-get update
376
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
377
+
378
+ - name: Build with native CMake MUSA support
379
+ id: cmake_build
380
+ run: |
381
+ cmake -B build -S . -DGGML_MUSA=ON
382
+ cmake --build build --config Release -j $(nproc)
383
+
406
384
  ubuntu-22-cmake-sycl:
407
385
  runs-on: ubuntu-22.04
408
386
 
@@ -485,36 +463,6 @@ jobs:
485
463
  cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
486
464
  cmake --build . --config Release -j $(nproc)
487
465
 
488
- # TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
489
- # how to debug it.
490
- # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
491
- macOS-latest-make:
492
- runs-on: macos-latest
493
-
494
- steps:
495
- - name: Clone
496
- id: checkout
497
- uses: actions/checkout@v4
498
-
499
- - name: Dependencies
500
- id: depends
501
- continue-on-error: true
502
- run: |
503
- brew update
504
-
505
- - name: Build
506
- id: make_build
507
- env:
508
- LLAMA_FATAL_WARNINGS: 1
509
- run: |
510
- GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
511
-
512
- - name: Test
513
- id: make_test
514
- run: |
515
- GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
516
- GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
517
-
518
466
  # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
519
467
  # how to debug it.
520
468
  # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
@@ -569,6 +517,7 @@ jobs:
569
517
  mkdir build
570
518
  cd build
571
519
  cmake -G Xcode .. \
520
+ -DGGML_METAL_USE_BF16=ON \
572
521
  -DGGML_METAL_EMBED_LIBRARY=ON \
573
522
  -DLLAMA_BUILD_EXAMPLES=OFF \
574
523
  -DLLAMA_BUILD_TESTS=OFF \
@@ -599,6 +548,7 @@ jobs:
599
548
  mkdir build
600
549
  cd build
601
550
  cmake -G Xcode .. \
551
+ -DGGML_METAL_USE_BF16=ON \
602
552
  -DGGML_METAL_EMBED_LIBRARY=ON \
603
553
  -DLLAMA_BUILD_EXAMPLES=OFF \
604
554
  -DLLAMA_BUILD_TESTS=OFF \
@@ -626,15 +576,26 @@ jobs:
626
576
  run: |
627
577
  brew update
628
578
 
629
- - name: xcodebuild for swift package
630
- id: xcodebuild
579
+ - name: Build llama.cpp with CMake
580
+ id: cmake_build
631
581
  run: |
632
- xcodebuild -scheme llama -destination "${{ matrix.destination }}"
582
+ sysctl -a
583
+ mkdir build
584
+ cd build
585
+ cmake -G Xcode .. \
586
+ -DGGML_METAL_USE_BF16=ON \
587
+ -DGGML_METAL_EMBED_LIBRARY=ON \
588
+ -DLLAMA_BUILD_EXAMPLES=OFF \
589
+ -DLLAMA_BUILD_TESTS=OFF \
590
+ -DLLAMA_BUILD_SERVER=OFF \
591
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
592
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
593
+ sudo cmake --install . --config Release
633
594
 
634
- - name: Build Swift Example
635
- id: make_build_swift_example
595
+ - name: xcodebuild for swift package
596
+ id: xcodebuild
636
597
  run: |
637
- make swift
598
+ xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
638
599
 
639
600
  windows-msys2:
640
601
  runs-on: windows-latest
@@ -661,21 +622,6 @@ jobs:
661
622
  mingw-w64-${{matrix.env}}-cmake
662
623
  mingw-w64-${{matrix.env}}-openblas
663
624
 
664
- - name: Build using make
665
- shell: msys2 {0}
666
- run: |
667
- make -j $(nproc)
668
-
669
- - name: Clean after building using make
670
- shell: msys2 {0}
671
- run: |
672
- make clean
673
-
674
- - name: Build using make w/ OpenBLAS
675
- shell: msys2 {0}
676
- run: |
677
- make GGML_OPENBLAS=1 -j $(nproc)
678
-
679
625
  - name: Build using CMake
680
626
  shell: msys2 {0}
681
627
  run: |
@@ -694,7 +640,7 @@ jobs:
694
640
  cmake --build build --config ${{ matrix.build }} -j $(nproc)
695
641
 
696
642
  windows-latest-cmake:
697
- runs-on: windows-2019
643
+ runs-on: windows-latest
698
644
 
699
645
  env:
700
646
  OPENBLAS_VERSION: 0.3.23
@@ -722,6 +668,8 @@ jobs:
722
668
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
723
669
  - build: 'msvc-arm64'
724
670
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
671
+ - build: 'llvm-arm64-opencl-adreno'
672
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
725
673
 
726
674
  steps:
727
675
  - name: Clone
@@ -734,7 +682,7 @@ jobs:
734
682
  id: clone_kompute
735
683
  if: ${{ matrix.build == 'kompute-x64' }}
736
684
  run: |
737
- git submodule update --init ggml/src/kompute
685
+ git submodule update --init ggml/src/ggml-kompute/kompute
738
686
 
739
687
  - name: Download OpenBLAS
740
688
  id: get_openblas
@@ -763,6 +711,28 @@ jobs:
763
711
  run: |
764
712
  choco install ninja
765
713
 
714
+ - name: Install OpenCL Headers and Libs
715
+ id: install_opencl
716
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
717
+ run: |
718
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
719
+ cd OpenCL-Headers
720
+ mkdir build && cd build
721
+ cmake .. `
722
+ -DBUILD_TESTING=OFF `
723
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
724
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
725
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
726
+ cmake --build . --target install
727
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
728
+ cd OpenCL-ICD-Loader
729
+ mkdir build-arm64-release && cd build-arm64-release
730
+ cmake .. `
731
+ -A arm64 `
732
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
733
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
734
+ cmake --build . --target install --config release
735
+
766
736
  - name: Build
767
737
  id: cmake_build
768
738
  run: |
@@ -792,7 +762,7 @@ jobs:
792
762
  - name: Test
793
763
  id: cmake_test
794
764
  # not all machines have native AVX-512
795
- if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
765
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
796
766
  run: |
797
767
  cd build
798
768
  ctest -L main -C Release --verbose --timeout 900
@@ -837,12 +807,33 @@ jobs:
837
807
  path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
838
808
  name: llama-bin-win-${{ matrix.build }}.zip
839
809
 
840
- windows-latest-cmake-cuda:
810
+ ubuntu-latest-cmake-cuda:
811
+ runs-on: ubuntu-latest
812
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
813
+
814
+ steps:
815
+ - name: Clone
816
+ id: checkout
817
+ uses: actions/checkout@v4
818
+
819
+ - name: Install dependencies
820
+ env:
821
+ DEBIAN_FRONTEND: noninteractive
822
+ run: |
823
+ apt update
824
+ apt install -y cmake build-essential ninja-build libgomp1 git
825
+
826
+ - name: Build with CMake
827
+ run: |
828
+ cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
829
+ cmake --build build
830
+
831
+ windows-2019-cmake-cuda:
841
832
  runs-on: windows-2019
842
833
 
843
834
  strategy:
844
835
  matrix:
845
- cuda: ['12.2.0', '11.7.1']
836
+ cuda: ['12.4', '11.7']
846
837
  build: ['cuda']
847
838
 
848
839
  steps:
@@ -850,24 +841,83 @@ jobs:
850
841
  id: checkout
851
842
  uses: actions/checkout@v4
852
843
  with:
853
- fetch-depth: 0
854
-
855
- - name: Install CUDA toolkit
856
- id: cuda-toolkit
857
- uses: Jimver/cuda-toolkit@v0.2.15
844
+ fetch-depth: 0
845
+
846
+ - name: Install Cuda Toolkit 11.7
847
+ if: ${{ matrix.cuda == '11.7' }}
848
+ run: |
849
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
850
+ choco install unzip -y
851
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
852
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
853
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
854
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
855
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
856
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
857
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
858
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
859
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
860
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
861
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
862
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
863
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
864
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
865
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
866
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
867
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
868
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
869
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
870
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
871
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
872
+
873
+ - name: Install Cuda Toolkit 12.4
874
+ if: ${{ matrix.cuda == '12.4' }}
875
+ run: |
876
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
877
+ choco install unzip -y
878
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
879
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
880
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
881
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
882
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
883
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
884
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
885
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
886
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
887
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
888
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
889
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
890
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
891
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
892
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
893
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
894
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
895
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
896
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
897
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
898
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
899
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
900
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
901
+
902
+ - name: Install ccache
903
+ uses: hendrikmuhs/ccache-action@v1.2
858
904
  with:
859
- cuda: ${{ matrix.cuda }}
860
- method: 'network'
861
- sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
905
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
906
+
907
+ - name: Install Ninja
908
+ id: install_ninja
909
+ run: |
910
+ choco install ninja
862
911
 
863
912
  - name: Build
864
913
  id: cmake_build
914
+ shell: cmd
865
915
  run: |
866
- mkdir build
867
- cd build
868
- cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
869
- cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
870
- cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
916
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
917
+ cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
918
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
919
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
920
+ cmake --build build --config Release
871
921
 
872
922
  - name: Determine tag name
873
923
  id: tag
@@ -896,10 +946,12 @@ jobs:
896
946
  name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
897
947
 
898
948
  - name: Copy and pack Cuda runtime
949
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
899
950
  run: |
900
- echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
951
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
901
952
  $dst='.\build\bin\cudart\'
902
- robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
953
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
954
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
903
955
  7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
904
956
 
905
957
  - name: Upload Cuda runtime
@@ -917,8 +969,8 @@ jobs:
917
969
  shell: bash
918
970
 
919
971
  env:
920
- WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe
921
- WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
972
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
973
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
922
974
  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
923
975
  steps:
924
976
  - name: Clone
@@ -928,7 +980,8 @@ jobs:
928
980
  fetch-depth: 0
929
981
 
930
982
  - name: Install
931
- run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
983
+ run: |
984
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
932
985
 
933
986
  - name: Build
934
987
  id: cmake_build
@@ -947,25 +1000,33 @@ jobs:
947
1000
  echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
948
1001
  fi
949
1002
 
950
- - name: Pack artifacts
1003
+ - name: Build the release package
951
1004
  id: pack_artifacts
952
1005
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
953
1006
  run: |
954
1007
  echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
955
- cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin
1008
+
1009
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
956
1010
  cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
957
1011
  cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
958
1012
 
959
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin
960
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin
961
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
1013
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
1014
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
1015
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
1016
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
1017
+
1018
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
962
1019
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
963
1020
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
964
1021
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
1022
+
1023
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
1024
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
1025
+
965
1026
  echo "cp oneAPI running time dll files to ./build/bin done"
966
1027
  7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
967
1028
 
968
- - name: Upload artifacts
1029
+ - name: Upload the release package
969
1030
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
970
1031
  uses: actions/upload-artifact@v4
971
1032
  with:
@@ -996,12 +1057,17 @@ jobs:
996
1057
  run: |
997
1058
  & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
998
1059
 
1060
+ - name: Install ccache
1061
+ uses: hendrikmuhs/ccache-action@v1.2
1062
+ with:
1063
+ key: ${{ github.job }}
1064
+
999
1065
  - name: Build
1000
1066
  id: cmake_build
1001
1067
  run: |
1002
1068
  $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1003
1069
  $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1004
- cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1070
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1005
1071
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1006
1072
 
1007
1073
  windows-latest-cmake-hip-release:
@@ -1016,6 +1082,8 @@ jobs:
1016
1082
  - name: Clone
1017
1083
  id: checkout
1018
1084
  uses: actions/checkout@v4
1085
+ with:
1086
+ fetch-depth: 0
1019
1087
 
1020
1088
  - name: Install
1021
1089
  id: depends
@@ -1037,7 +1105,7 @@ jobs:
1037
1105
  run: |
1038
1106
  $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1039
1107
  $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1040
- cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1108
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1041
1109
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1042
1110
  md "build\bin\rocblas\library\"
1043
1111
  cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
@@ -1075,6 +1143,29 @@ jobs:
1075
1143
  - name: Checkout code
1076
1144
  uses: actions/checkout@v4
1077
1145
 
1146
+ - name: Build
1147
+ id: cmake_build
1148
+ run: |
1149
+ sysctl -a
1150
+ mkdir build
1151
+ cd build
1152
+ cmake -G Xcode .. \
1153
+ -DGGML_METAL_USE_BF16=ON \
1154
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1155
+ -DLLAMA_BUILD_EXAMPLES=OFF \
1156
+ -DLLAMA_BUILD_TESTS=OFF \
1157
+ -DLLAMA_BUILD_SERVER=OFF \
1158
+ -DCMAKE_SYSTEM_NAME=iOS \
1159
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1160
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1161
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1162
+ sudo cmake --install . --config Release
1163
+
1164
+ - name: xcodebuild for swift package
1165
+ id: xcodebuild
1166
+ run: |
1167
+ xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
1168
+
1078
1169
  - name: Build Xcode project
1079
1170
  run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1080
1171
 
@@ -1102,35 +1193,16 @@ jobs:
1102
1193
 
1103
1194
  ./gradlew build --no-daemon
1104
1195
 
1105
- # freeBSD-latest:
1106
- # runs-on: macos-12
1107
- # steps:
1108
- # - name: Clone
1109
- # uses: actions/checkout@v4
1110
- #
1111
- # - name: Build
1112
- # uses: cross-platform-actions/action@v0.19.0
1113
- # with:
1114
- # operating_system: freebsd
1115
- # version: '13.2'
1116
- # hypervisor: 'qemu'
1117
- # run: |
1118
- # sudo pkg update
1119
- # sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
1120
- # gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
1121
-
1122
1196
  release:
1123
1197
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1124
1198
 
1125
1199
  runs-on: ubuntu-latest
1126
1200
 
1127
1201
  needs:
1128
- - ubuntu-focal-make
1129
1202
  - ubuntu-latest-cmake
1130
- - macOS-latest-make
1131
1203
  - macOS-latest-cmake
1132
1204
  - windows-latest-cmake
1133
- - windows-latest-cmake-cuda
1205
+ - windows-2019-cmake-cuda
1134
1206
  - windows-latest-cmake-hip-release
1135
1207
  - macOS-latest-cmake-arm64
1136
1208
  - macOS-latest-cmake-x64