@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -160,66 +160,6 @@ jobs:
160
160
  path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
161
161
  name: llama-bin-macos-x64.zip
162
162
 
163
- ubuntu-focal-make:
164
- runs-on: ubuntu-20.04
165
- env:
166
- LLAMA_NODE_AVAILABLE: true
167
- LLAMA_PYTHON_AVAILABLE: true
168
-
169
- steps:
170
- - name: Clone
171
- id: checkout
172
- uses: actions/checkout@v4
173
-
174
- - name: Dependencies
175
- id: depends
176
- run: |
177
- sudo apt-get update
178
- sudo apt-get install build-essential gcc-8
179
-
180
- - uses: actions/setup-node@v4
181
- with:
182
- node-version: "20"
183
-
184
- - uses: actions/setup-python@v5
185
- with:
186
- python-version: "3.11"
187
-
188
- - name: Build
189
- id: make_build
190
- env:
191
- LLAMA_FATAL_WARNINGS: 1
192
- run: |
193
- CC=gcc-8 make -j $(nproc)
194
-
195
- - name: Test
196
- id: make_test
197
- run: |
198
- CC=gcc-8 make tests -j $(nproc)
199
- make test -j $(nproc)
200
-
201
- ubuntu-focal-make-curl:
202
- runs-on: ubuntu-20.04
203
-
204
- steps:
205
- - name: Clone
206
- id: checkout
207
- uses: actions/checkout@v4
208
-
209
- - name: Dependencies
210
- id: depends
211
- run: |
212
- sudo apt-get update
213
- sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
214
-
215
- - name: Build
216
- id: make_build
217
- env:
218
- LLAMA_FATAL_WARNINGS: 1
219
- LLAMA_CURL: 1
220
- run: |
221
- CC=gcc-8 make -j $(nproc)
222
-
223
163
  ubuntu-latest-cmake:
224
164
  runs-on: ubuntu-latest
225
165
 
@@ -377,7 +317,7 @@ jobs:
377
317
  wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
378
318
  sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
379
319
  sudo apt-get update -y
380
- sudo apt-get install -y build-essential vulkan-sdk
320
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
381
321
 
382
322
  - name: Build
383
323
  id: cmake_build
@@ -387,6 +327,12 @@ jobs:
387
327
  cmake -DGGML_VULKAN=ON ..
388
328
  cmake --build . --config Release -j $(nproc)
389
329
 
330
+ - name: Test
331
+ id: cmake_test
332
+ run: |
333
+ cd build
334
+ ctest -L main --verbose --timeout 900
335
+
390
336
  ubuntu-22-cmake-hip:
391
337
  runs-on: ubuntu-22.04
392
338
  container: rocm/dev-ubuntu-22.04:6.0.2
@@ -517,36 +463,6 @@ jobs:
517
463
  cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
518
464
  cmake --build . --config Release -j $(nproc)
519
465
 
520
- # TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
521
- # how to debug it.
522
- # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
523
- macOS-latest-make:
524
- runs-on: macos-latest
525
-
526
- steps:
527
- - name: Clone
528
- id: checkout
529
- uses: actions/checkout@v4
530
-
531
- - name: Dependencies
532
- id: depends
533
- continue-on-error: true
534
- run: |
535
- brew update
536
-
537
- - name: Build
538
- id: make_build
539
- env:
540
- LLAMA_FATAL_WARNINGS: 1
541
- run: |
542
- GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
543
-
544
- - name: Test
545
- id: make_test
546
- run: |
547
- GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
548
- GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
549
-
550
466
  # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
551
467
  # how to debug it.
552
468
  # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
@@ -660,15 +576,26 @@ jobs:
660
576
  run: |
661
577
  brew update
662
578
 
663
- - name: xcodebuild for swift package
664
- id: xcodebuild
579
+ - name: Build llama.cpp with CMake
580
+ id: cmake_build
665
581
  run: |
666
- xcodebuild -scheme llama -destination "${{ matrix.destination }}"
582
+ sysctl -a
583
+ mkdir build
584
+ cd build
585
+ cmake -G Xcode .. \
586
+ -DGGML_METAL_USE_BF16=ON \
587
+ -DGGML_METAL_EMBED_LIBRARY=ON \
588
+ -DLLAMA_BUILD_EXAMPLES=OFF \
589
+ -DLLAMA_BUILD_TESTS=OFF \
590
+ -DLLAMA_BUILD_SERVER=OFF \
591
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
592
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
593
+ sudo cmake --install . --config Release
667
594
 
668
- - name: Build Swift Example
669
- id: make_build_swift_example
595
+ - name: xcodebuild for swift package
596
+ id: xcodebuild
670
597
  run: |
671
- make swift
598
+ xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
672
599
 
673
600
  windows-msys2:
674
601
  runs-on: windows-latest
@@ -695,21 +622,6 @@ jobs:
695
622
  mingw-w64-${{matrix.env}}-cmake
696
623
  mingw-w64-${{matrix.env}}-openblas
697
624
 
698
- - name: Build using make
699
- shell: msys2 {0}
700
- run: |
701
- make -j $(nproc)
702
-
703
- - name: Clean after building using make
704
- shell: msys2 {0}
705
- run: |
706
- make clean
707
-
708
- - name: Build using make w/ OpenBLAS
709
- shell: msys2 {0}
710
- run: |
711
- make GGML_OPENBLAS=1 -j $(nproc)
712
-
713
625
  - name: Build using CMake
714
626
  shell: msys2 {0}
715
627
  run: |
@@ -728,7 +640,7 @@ jobs:
728
640
  cmake --build build --config ${{ matrix.build }} -j $(nproc)
729
641
 
730
642
  windows-latest-cmake:
731
- runs-on: windows-2019
643
+ runs-on: windows-latest
732
644
 
733
645
  env:
734
646
  OPENBLAS_VERSION: 0.3.23
@@ -756,6 +668,8 @@ jobs:
756
668
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
757
669
  - build: 'msvc-arm64'
758
670
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
671
+ - build: 'llvm-arm64-opencl-adreno'
672
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
759
673
 
760
674
  steps:
761
675
  - name: Clone
@@ -797,6 +711,28 @@ jobs:
797
711
  run: |
798
712
  choco install ninja
799
713
 
714
+ - name: Install OpenCL Headers and Libs
715
+ id: install_opencl
716
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
717
+ run: |
718
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
719
+ cd OpenCL-Headers
720
+ mkdir build && cd build
721
+ cmake .. `
722
+ -DBUILD_TESTING=OFF `
723
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
724
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
725
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
726
+ cmake --build . --target install
727
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
728
+ cd OpenCL-ICD-Loader
729
+ mkdir build-arm64-release && cd build-arm64-release
730
+ cmake .. `
731
+ -A arm64 `
732
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
733
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
734
+ cmake --build . --target install --config release
735
+
800
736
  - name: Build
801
737
  id: cmake_build
802
738
  run: |
@@ -826,7 +762,7 @@ jobs:
826
762
  - name: Test
827
763
  id: cmake_test
828
764
  # not all machines have native AVX-512
829
- if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
765
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
830
766
  run: |
831
767
  cd build
832
768
  ctest -L main -C Release --verbose --timeout 900
@@ -871,12 +807,33 @@ jobs:
871
807
  path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
872
808
  name: llama-bin-win-${{ matrix.build }}.zip
873
809
 
874
- windows-latest-cmake-cuda:
810
+ ubuntu-latest-cmake-cuda:
811
+ runs-on: ubuntu-latest
812
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
813
+
814
+ steps:
815
+ - name: Clone
816
+ id: checkout
817
+ uses: actions/checkout@v4
818
+
819
+ - name: Install dependencies
820
+ env:
821
+ DEBIAN_FRONTEND: noninteractive
822
+ run: |
823
+ apt update
824
+ apt install -y cmake build-essential ninja-build libgomp1 git
825
+
826
+ - name: Build with CMake
827
+ run: |
828
+ cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
829
+ cmake --build build
830
+
831
+ windows-2019-cmake-cuda:
875
832
  runs-on: windows-2019
876
833
 
877
834
  strategy:
878
835
  matrix:
879
- cuda: ['12.2.0', '11.7.1']
836
+ cuda: ['12.4', '11.7']
880
837
  build: ['cuda']
881
838
 
882
839
  steps:
@@ -884,24 +841,83 @@ jobs:
884
841
  id: checkout
885
842
  uses: actions/checkout@v4
886
843
  with:
887
- fetch-depth: 0
888
-
889
- - name: Install CUDA toolkit
890
- id: cuda-toolkit
891
- uses: Jimver/cuda-toolkit@v0.2.15
844
+ fetch-depth: 0
845
+
846
+ - name: Install Cuda Toolkit 11.7
847
+ if: ${{ matrix.cuda == '11.7' }}
848
+ run: |
849
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
850
+ choco install unzip -y
851
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
852
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
853
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
854
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
855
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
856
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
857
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
858
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
859
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
860
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
861
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
862
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
863
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
864
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
865
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
866
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
867
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
868
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
869
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
870
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
871
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
872
+
873
+ - name: Install Cuda Toolkit 12.4
874
+ if: ${{ matrix.cuda == '12.4' }}
875
+ run: |
876
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
877
+ choco install unzip -y
878
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
879
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
880
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
881
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
882
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
883
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
884
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
885
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
886
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
887
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
888
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
889
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
890
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
891
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
892
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
893
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
894
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
895
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
896
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
897
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
898
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
899
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
900
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
901
+
902
+ - name: Install ccache
903
+ uses: hendrikmuhs/ccache-action@v1.2
892
904
  with:
893
- cuda: ${{ matrix.cuda }}
894
- method: 'network'
895
- sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
905
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
906
+
907
+ - name: Install Ninja
908
+ id: install_ninja
909
+ run: |
910
+ choco install ninja
896
911
 
897
912
  - name: Build
898
913
  id: cmake_build
914
+ shell: cmd
899
915
  run: |
900
- mkdir build
901
- cd build
902
- cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
903
- cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
904
- cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
916
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
917
+ cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
918
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
919
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
920
+ cmake --build build --config Release
905
921
 
906
922
  - name: Determine tag name
907
923
  id: tag
@@ -930,10 +946,12 @@ jobs:
930
946
  name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
931
947
 
932
948
  - name: Copy and pack Cuda runtime
949
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
933
950
  run: |
934
- echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
951
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
935
952
  $dst='.\build\bin\cudart\'
936
- robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
953
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
954
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
937
955
  7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
938
956
 
939
957
  - name: Upload Cuda runtime
@@ -952,7 +970,7 @@ jobs:
952
970
 
953
971
  env:
954
972
  WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
955
- WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
973
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
956
974
  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
957
975
  steps:
958
976
  - name: Clone
@@ -962,7 +980,8 @@ jobs:
962
980
  fetch-depth: 0
963
981
 
964
982
  - name: Install
965
- run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
983
+ run: |
984
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
966
985
 
967
986
  - name: Build
968
987
  id: cmake_build
@@ -981,25 +1000,33 @@ jobs:
981
1000
  echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
982
1001
  fi
983
1002
 
984
- - name: Pack artifacts
1003
+ - name: Build the release package
985
1004
  id: pack_artifacts
986
1005
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
987
1006
  run: |
988
1007
  echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
989
- cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin
1008
+
1009
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
990
1010
  cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
991
1011
  cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
992
1012
 
993
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin
994
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin
995
- cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
1013
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
1014
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
1015
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
1016
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
1017
+
1018
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
996
1019
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
997
1020
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
998
1021
  cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
1022
+
1023
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
1024
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
1025
+
999
1026
  echo "cp oneAPI running time dll files to ./build/bin done"
1000
1027
  7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
1001
1028
 
1002
- - name: Upload artifacts
1029
+ - name: Upload the release package
1003
1030
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1004
1031
  uses: actions/upload-artifact@v4
1005
1032
  with:
@@ -1030,6 +1057,11 @@ jobs:
1030
1057
  run: |
1031
1058
  & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1032
1059
 
1060
+ - name: Install ccache
1061
+ uses: hendrikmuhs/ccache-action@v1.2
1062
+ with:
1063
+ key: ${{ github.job }}
1064
+
1033
1065
  - name: Build
1034
1066
  id: cmake_build
1035
1067
  run: |
@@ -1050,6 +1082,8 @@ jobs:
1050
1082
  - name: Clone
1051
1083
  id: checkout
1052
1084
  uses: actions/checkout@v4
1085
+ with:
1086
+ fetch-depth: 0
1053
1087
 
1054
1088
  - name: Install
1055
1089
  id: depends
@@ -1109,6 +1143,29 @@ jobs:
1109
1143
  - name: Checkout code
1110
1144
  uses: actions/checkout@v4
1111
1145
 
1146
+ - name: Build
1147
+ id: cmake_build
1148
+ run: |
1149
+ sysctl -a
1150
+ mkdir build
1151
+ cd build
1152
+ cmake -G Xcode .. \
1153
+ -DGGML_METAL_USE_BF16=ON \
1154
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1155
+ -DLLAMA_BUILD_EXAMPLES=OFF \
1156
+ -DLLAMA_BUILD_TESTS=OFF \
1157
+ -DLLAMA_BUILD_SERVER=OFF \
1158
+ -DCMAKE_SYSTEM_NAME=iOS \
1159
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1160
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1161
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1162
+ sudo cmake --install . --config Release
1163
+
1164
+ - name: xcodebuild for swift package
1165
+ id: xcodebuild
1166
+ run: |
1167
+ xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
1168
+
1112
1169
  - name: Build Xcode project
1113
1170
  run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1114
1171
 
@@ -1136,35 +1193,16 @@ jobs:
1136
1193
 
1137
1194
  ./gradlew build --no-daemon
1138
1195
 
1139
- # freeBSD-latest:
1140
- # runs-on: macos-12
1141
- # steps:
1142
- # - name: Clone
1143
- # uses: actions/checkout@v4
1144
- #
1145
- # - name: Build
1146
- # uses: cross-platform-actions/action@v0.19.0
1147
- # with:
1148
- # operating_system: freebsd
1149
- # version: '13.2'
1150
- # hypervisor: 'qemu'
1151
- # run: |
1152
- # sudo pkg update
1153
- # sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
1154
- # gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
1155
-
1156
1196
  release:
1157
1197
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1158
1198
 
1159
1199
  runs-on: ubuntu-latest
1160
1200
 
1161
1201
  needs:
1162
- - ubuntu-focal-make
1163
1202
  - ubuntu-latest-cmake
1164
- - macOS-latest-make
1165
1203
  - macOS-latest-cmake
1166
1204
  - windows-latest-cmake
1167
- - windows-latest-cmake-cuda
1205
+ - windows-2019-cmake-cuda
1168
1206
  - windows-latest-cmake-hip-release
1169
1207
  - macOS-latest-cmake-arm64
1170
1208
  - macOS-latest-cmake-x64
@@ -10,12 +10,10 @@
10
10
  name: Publish Docker image
11
11
 
12
12
  on:
13
- #pull_request:
14
- push:
15
- branches:
16
- - master
17
- paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
18
- workflow_dispatch: # allows manual triggering, useful for debugging
13
+ workflow_dispatch: # allows manual triggering
14
+ schedule:
15
+ # Rebuild daily rather than on every push because it is expensive
16
+ - cron: '12 4 * * *'
19
17
 
20
18
  concurrency:
21
19
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -29,7 +27,6 @@ permissions:
29
27
  jobs:
30
28
  push_to_registry:
31
29
  name: Push Docker image to Docker Hub
32
- #if: github.event.pull_request.draft == false
33
30
 
34
31
  runs-on: ubuntu-latest
35
32
  env:
@@ -117,7 +114,7 @@ jobs:
117
114
  swap-storage: true
118
115
 
119
116
  - name: Build and push Docker image (tagged + versioned)
120
- if: github.event_name == 'push'
117
+ if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
121
118
  uses: docker/build-push-action@v6
122
119
  with:
123
120
  context: .
@@ -1,6 +1,13 @@
1
1
  name: flake8 Lint
2
2
 
3
- on: [push, pull_request]
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
8
+ pull_request:
9
+ types: [opened, synchronize, reopened]
10
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
4
11
 
5
12
  concurrency:
6
13
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -76,20 +76,26 @@ jobs:
76
76
  run: |
77
77
  pip install -r examples/server/tests/requirements.txt
78
78
 
79
- - name: Verify server deps
80
- id: verify_server_deps
79
+ # Setup nodejs (to be used for verifying bundled index.html)
80
+ - uses: actions/setup-node@v4
81
+ with:
82
+ node-version: '22.11.0'
83
+
84
+ - name: Verify bundled index.html
85
+ id: verify_server_index_html
81
86
  run: |
82
87
  git config --global --add safe.directory $(realpath .)
83
- cd examples/server
84
- git ls-files --others --modified
88
+ cd examples/server/webui
85
89
  git status
86
- ./deps.sh
90
+ npm ci
91
+ npm run build
87
92
  git status
88
- not_ignored_files="$(git ls-files --others --modified)"
89
- echo "Modified files: ${not_ignored_files}"
90
- if [ -n "${not_ignored_files}" ]; then
91
- echo "Repository is dirty or server deps are not built as expected"
92
- echo "${not_ignored_files}"
93
+ modified_files="$(git status -s)"
94
+ echo "Modified files: ${modified_files}"
95
+ if [ -n "${modified_files}" ]; then
96
+ echo "Repository is dirty or server/webui is not built as expected"
97
+ echo "Hint: You may need to follow Web UI build guide in server/README.md"
98
+ echo "${modified_files}"
93
99
  exit 1
94
100
  fi
95
101
 
@@ -122,14 +128,14 @@ jobs:
122
128
  id: server_integration_tests
123
129
  run: |
124
130
  cd examples/server/tests
125
- PORT=8888 ./tests.sh
131
+ ./tests.sh
126
132
 
127
133
  - name: Slow tests
128
134
  id: server_integration_tests_slow
129
135
  if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
130
136
  run: |
131
137
  cd examples/server/tests
132
- PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
138
+ SLOW_TESTS=1 ./tests.sh
133
139
 
134
140
 
135
141
  server-windows:
@@ -180,11 +186,12 @@ jobs:
180
186
  run: |
181
187
  cd examples/server/tests
182
188
  $env:PYTHONIOENCODING = ":replace"
183
- behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
189
+ pytest -v -x
184
190
 
185
191
  - name: Slow tests
186
192
  id: server_integration_tests_slow
187
193
  if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
188
194
  run: |
189
195
  cd examples/server/tests
190
- behave.exe --stop --no-skipped --no-capture --tags slow
196
+ $env:SLOW_TESTS = "1"
197
+ pytest -v -x