@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -160,66 +160,6 @@ jobs:
|
|
|
160
160
|
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
|
161
161
|
name: llama-bin-macos-x64.zip
|
|
162
162
|
|
|
163
|
-
ubuntu-focal-make:
|
|
164
|
-
runs-on: ubuntu-20.04
|
|
165
|
-
env:
|
|
166
|
-
LLAMA_NODE_AVAILABLE: true
|
|
167
|
-
LLAMA_PYTHON_AVAILABLE: true
|
|
168
|
-
|
|
169
|
-
steps:
|
|
170
|
-
- name: Clone
|
|
171
|
-
id: checkout
|
|
172
|
-
uses: actions/checkout@v4
|
|
173
|
-
|
|
174
|
-
- name: Dependencies
|
|
175
|
-
id: depends
|
|
176
|
-
run: |
|
|
177
|
-
sudo apt-get update
|
|
178
|
-
sudo apt-get install build-essential gcc-8
|
|
179
|
-
|
|
180
|
-
- uses: actions/setup-node@v4
|
|
181
|
-
with:
|
|
182
|
-
node-version: "20"
|
|
183
|
-
|
|
184
|
-
- uses: actions/setup-python@v5
|
|
185
|
-
with:
|
|
186
|
-
python-version: "3.11"
|
|
187
|
-
|
|
188
|
-
- name: Build
|
|
189
|
-
id: make_build
|
|
190
|
-
env:
|
|
191
|
-
LLAMA_FATAL_WARNINGS: 1
|
|
192
|
-
run: |
|
|
193
|
-
CC=gcc-8 make -j $(nproc)
|
|
194
|
-
|
|
195
|
-
- name: Test
|
|
196
|
-
id: make_test
|
|
197
|
-
run: |
|
|
198
|
-
CC=gcc-8 make tests -j $(nproc)
|
|
199
|
-
make test -j $(nproc)
|
|
200
|
-
|
|
201
|
-
ubuntu-focal-make-curl:
|
|
202
|
-
runs-on: ubuntu-20.04
|
|
203
|
-
|
|
204
|
-
steps:
|
|
205
|
-
- name: Clone
|
|
206
|
-
id: checkout
|
|
207
|
-
uses: actions/checkout@v4
|
|
208
|
-
|
|
209
|
-
- name: Dependencies
|
|
210
|
-
id: depends
|
|
211
|
-
run: |
|
|
212
|
-
sudo apt-get update
|
|
213
|
-
sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
|
|
214
|
-
|
|
215
|
-
- name: Build
|
|
216
|
-
id: make_build
|
|
217
|
-
env:
|
|
218
|
-
LLAMA_FATAL_WARNINGS: 1
|
|
219
|
-
LLAMA_CURL: 1
|
|
220
|
-
run: |
|
|
221
|
-
CC=gcc-8 make -j $(nproc)
|
|
222
|
-
|
|
223
163
|
ubuntu-latest-cmake:
|
|
224
164
|
runs-on: ubuntu-latest
|
|
225
165
|
|
|
@@ -377,7 +317,7 @@ jobs:
|
|
|
377
317
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
|
378
318
|
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
|
379
319
|
sudo apt-get update -y
|
|
380
|
-
sudo apt-get install -y build-essential vulkan-sdk
|
|
320
|
+
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
|
|
381
321
|
|
|
382
322
|
- name: Build
|
|
383
323
|
id: cmake_build
|
|
@@ -387,6 +327,12 @@ jobs:
|
|
|
387
327
|
cmake -DGGML_VULKAN=ON ..
|
|
388
328
|
cmake --build . --config Release -j $(nproc)
|
|
389
329
|
|
|
330
|
+
- name: Test
|
|
331
|
+
id: cmake_test
|
|
332
|
+
run: |
|
|
333
|
+
cd build
|
|
334
|
+
ctest -L main --verbose --timeout 900
|
|
335
|
+
|
|
390
336
|
ubuntu-22-cmake-hip:
|
|
391
337
|
runs-on: ubuntu-22.04
|
|
392
338
|
container: rocm/dev-ubuntu-22.04:6.0.2
|
|
@@ -517,36 +463,6 @@ jobs:
|
|
|
517
463
|
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
|
|
518
464
|
cmake --build . --config Release -j $(nproc)
|
|
519
465
|
|
|
520
|
-
# TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
|
521
|
-
# how to debug it.
|
|
522
|
-
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
|
|
523
|
-
macOS-latest-make:
|
|
524
|
-
runs-on: macos-latest
|
|
525
|
-
|
|
526
|
-
steps:
|
|
527
|
-
- name: Clone
|
|
528
|
-
id: checkout
|
|
529
|
-
uses: actions/checkout@v4
|
|
530
|
-
|
|
531
|
-
- name: Dependencies
|
|
532
|
-
id: depends
|
|
533
|
-
continue-on-error: true
|
|
534
|
-
run: |
|
|
535
|
-
brew update
|
|
536
|
-
|
|
537
|
-
- name: Build
|
|
538
|
-
id: make_build
|
|
539
|
-
env:
|
|
540
|
-
LLAMA_FATAL_WARNINGS: 1
|
|
541
|
-
run: |
|
|
542
|
-
GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
|
|
543
|
-
|
|
544
|
-
- name: Test
|
|
545
|
-
id: make_test
|
|
546
|
-
run: |
|
|
547
|
-
GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
|
|
548
|
-
GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
|
|
549
|
-
|
|
550
466
|
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
|
551
467
|
# how to debug it.
|
|
552
468
|
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
|
|
@@ -660,15 +576,26 @@ jobs:
|
|
|
660
576
|
run: |
|
|
661
577
|
brew update
|
|
662
578
|
|
|
663
|
-
- name:
|
|
664
|
-
id:
|
|
579
|
+
- name: Build llama.cpp with CMake
|
|
580
|
+
id: cmake_build
|
|
665
581
|
run: |
|
|
666
|
-
|
|
582
|
+
sysctl -a
|
|
583
|
+
mkdir build
|
|
584
|
+
cd build
|
|
585
|
+
cmake -G Xcode .. \
|
|
586
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
587
|
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
588
|
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
589
|
+
-DLLAMA_BUILD_TESTS=OFF \
|
|
590
|
+
-DLLAMA_BUILD_SERVER=OFF \
|
|
591
|
+
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
|
592
|
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
593
|
+
sudo cmake --install . --config Release
|
|
667
594
|
|
|
668
|
-
- name:
|
|
669
|
-
id:
|
|
595
|
+
- name: xcodebuild for swift package
|
|
596
|
+
id: xcodebuild
|
|
670
597
|
run: |
|
|
671
|
-
|
|
598
|
+
xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
|
|
672
599
|
|
|
673
600
|
windows-msys2:
|
|
674
601
|
runs-on: windows-latest
|
|
@@ -695,21 +622,6 @@ jobs:
|
|
|
695
622
|
mingw-w64-${{matrix.env}}-cmake
|
|
696
623
|
mingw-w64-${{matrix.env}}-openblas
|
|
697
624
|
|
|
698
|
-
- name: Build using make
|
|
699
|
-
shell: msys2 {0}
|
|
700
|
-
run: |
|
|
701
|
-
make -j $(nproc)
|
|
702
|
-
|
|
703
|
-
- name: Clean after building using make
|
|
704
|
-
shell: msys2 {0}
|
|
705
|
-
run: |
|
|
706
|
-
make clean
|
|
707
|
-
|
|
708
|
-
- name: Build using make w/ OpenBLAS
|
|
709
|
-
shell: msys2 {0}
|
|
710
|
-
run: |
|
|
711
|
-
make GGML_OPENBLAS=1 -j $(nproc)
|
|
712
|
-
|
|
713
625
|
- name: Build using CMake
|
|
714
626
|
shell: msys2 {0}
|
|
715
627
|
run: |
|
|
@@ -728,7 +640,7 @@ jobs:
|
|
|
728
640
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
|
729
641
|
|
|
730
642
|
windows-latest-cmake:
|
|
731
|
-
runs-on: windows-
|
|
643
|
+
runs-on: windows-latest
|
|
732
644
|
|
|
733
645
|
env:
|
|
734
646
|
OPENBLAS_VERSION: 0.3.23
|
|
@@ -756,6 +668,8 @@ jobs:
|
|
|
756
668
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
757
669
|
- build: 'msvc-arm64'
|
|
758
670
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
671
|
+
- build: 'llvm-arm64-opencl-adreno'
|
|
672
|
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
|
759
673
|
|
|
760
674
|
steps:
|
|
761
675
|
- name: Clone
|
|
@@ -797,6 +711,28 @@ jobs:
|
|
|
797
711
|
run: |
|
|
798
712
|
choco install ninja
|
|
799
713
|
|
|
714
|
+
- name: Install OpenCL Headers and Libs
|
|
715
|
+
id: install_opencl
|
|
716
|
+
if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
|
|
717
|
+
run: |
|
|
718
|
+
git clone https://github.com/KhronosGroup/OpenCL-Headers
|
|
719
|
+
cd OpenCL-Headers
|
|
720
|
+
mkdir build && cd build
|
|
721
|
+
cmake .. `
|
|
722
|
+
-DBUILD_TESTING=OFF `
|
|
723
|
+
-DOPENCL_HEADERS_BUILD_TESTING=OFF `
|
|
724
|
+
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
|
|
725
|
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
|
726
|
+
cmake --build . --target install
|
|
727
|
+
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
|
|
728
|
+
cd OpenCL-ICD-Loader
|
|
729
|
+
mkdir build-arm64-release && cd build-arm64-release
|
|
730
|
+
cmake .. `
|
|
731
|
+
-A arm64 `
|
|
732
|
+
-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
|
|
733
|
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
|
734
|
+
cmake --build . --target install --config release
|
|
735
|
+
|
|
800
736
|
- name: Build
|
|
801
737
|
id: cmake_build
|
|
802
738
|
run: |
|
|
@@ -826,7 +762,7 @@ jobs:
|
|
|
826
762
|
- name: Test
|
|
827
763
|
id: cmake_test
|
|
828
764
|
# not all machines have native AVX-512
|
|
829
|
-
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
|
765
|
+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
|
830
766
|
run: |
|
|
831
767
|
cd build
|
|
832
768
|
ctest -L main -C Release --verbose --timeout 900
|
|
@@ -871,12 +807,33 @@ jobs:
|
|
|
871
807
|
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
|
|
872
808
|
name: llama-bin-win-${{ matrix.build }}.zip
|
|
873
809
|
|
|
874
|
-
|
|
810
|
+
ubuntu-latest-cmake-cuda:
|
|
811
|
+
runs-on: ubuntu-latest
|
|
812
|
+
container: nvidia/cuda:12.6.2-devel-ubuntu24.04
|
|
813
|
+
|
|
814
|
+
steps:
|
|
815
|
+
- name: Clone
|
|
816
|
+
id: checkout
|
|
817
|
+
uses: actions/checkout@v4
|
|
818
|
+
|
|
819
|
+
- name: Install dependencies
|
|
820
|
+
env:
|
|
821
|
+
DEBIAN_FRONTEND: noninteractive
|
|
822
|
+
run: |
|
|
823
|
+
apt update
|
|
824
|
+
apt install -y cmake build-essential ninja-build libgomp1 git
|
|
825
|
+
|
|
826
|
+
- name: Build with CMake
|
|
827
|
+
run: |
|
|
828
|
+
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
|
|
829
|
+
cmake --build build
|
|
830
|
+
|
|
831
|
+
windows-2019-cmake-cuda:
|
|
875
832
|
runs-on: windows-2019
|
|
876
833
|
|
|
877
834
|
strategy:
|
|
878
835
|
matrix:
|
|
879
|
-
cuda: ['12.
|
|
836
|
+
cuda: ['12.4', '11.7']
|
|
880
837
|
build: ['cuda']
|
|
881
838
|
|
|
882
839
|
steps:
|
|
@@ -884,24 +841,83 @@ jobs:
|
|
|
884
841
|
id: checkout
|
|
885
842
|
uses: actions/checkout@v4
|
|
886
843
|
with:
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
- name: Install
|
|
890
|
-
|
|
891
|
-
|
|
844
|
+
fetch-depth: 0
|
|
845
|
+
|
|
846
|
+
- name: Install Cuda Toolkit 11.7
|
|
847
|
+
if: ${{ matrix.cuda == '11.7' }}
|
|
848
|
+
run: |
|
|
849
|
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
|
850
|
+
choco install unzip -y
|
|
851
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
|
|
852
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
|
|
853
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
|
|
854
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
|
|
855
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
|
|
856
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
|
|
857
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
|
|
858
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
|
|
859
|
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
|
860
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
861
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
862
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
863
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
864
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
865
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
866
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
867
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
|
868
|
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
869
|
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
870
|
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
|
871
|
+
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
|
872
|
+
|
|
873
|
+
- name: Install Cuda Toolkit 12.4
|
|
874
|
+
if: ${{ matrix.cuda == '12.4' }}
|
|
875
|
+
run: |
|
|
876
|
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
|
877
|
+
choco install unzip -y
|
|
878
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
|
|
879
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
|
|
880
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
|
|
881
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
|
|
882
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
|
|
883
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
|
|
884
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
|
|
885
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
|
|
886
|
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
|
|
887
|
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
|
888
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
889
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
890
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
891
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
892
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
893
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
894
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
895
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
896
|
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
|
897
|
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
898
|
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
899
|
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
|
900
|
+
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
|
901
|
+
|
|
902
|
+
- name: Install ccache
|
|
903
|
+
uses: hendrikmuhs/ccache-action@v1.2
|
|
892
904
|
with:
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
905
|
+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
|
|
906
|
+
|
|
907
|
+
- name: Install Ninja
|
|
908
|
+
id: install_ninja
|
|
909
|
+
run: |
|
|
910
|
+
choco install ninja
|
|
896
911
|
|
|
897
912
|
- name: Build
|
|
898
913
|
id: cmake_build
|
|
914
|
+
shell: cmd
|
|
899
915
|
run: |
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
cmake --build
|
|
904
|
-
cmake --build
|
|
916
|
+
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
|
917
|
+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
|
|
918
|
+
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
|
919
|
+
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
|
920
|
+
cmake --build build --config Release
|
|
905
921
|
|
|
906
922
|
- name: Determine tag name
|
|
907
923
|
id: tag
|
|
@@ -930,10 +946,12 @@ jobs:
|
|
|
930
946
|
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
|
931
947
|
|
|
932
948
|
- name: Copy and pack Cuda runtime
|
|
949
|
+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
|
933
950
|
run: |
|
|
934
|
-
echo "Cuda install location: ${{
|
|
951
|
+
echo "Cuda install location: ${{ env.CUDA_PATH }}"
|
|
935
952
|
$dst='.\build\bin\cudart\'
|
|
936
|
-
robocopy "${{
|
|
953
|
+
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
|
954
|
+
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
|
937
955
|
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
|
|
938
956
|
|
|
939
957
|
- name: Upload Cuda runtime
|
|
@@ -952,7 +970,7 @@ jobs:
|
|
|
952
970
|
|
|
953
971
|
env:
|
|
954
972
|
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
|
|
955
|
-
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
|
|
973
|
+
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
|
956
974
|
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
|
957
975
|
steps:
|
|
958
976
|
- name: Clone
|
|
@@ -962,7 +980,8 @@ jobs:
|
|
|
962
980
|
fetch-depth: 0
|
|
963
981
|
|
|
964
982
|
- name: Install
|
|
965
|
-
run:
|
|
983
|
+
run: |
|
|
984
|
+
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
|
|
966
985
|
|
|
967
986
|
- name: Build
|
|
968
987
|
id: cmake_build
|
|
@@ -981,25 +1000,33 @@ jobs:
|
|
|
981
1000
|
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
982
1001
|
fi
|
|
983
1002
|
|
|
984
|
-
- name:
|
|
1003
|
+
- name: Build the release package
|
|
985
1004
|
id: pack_artifacts
|
|
986
1005
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
987
1006
|
run: |
|
|
988
1007
|
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
|
989
|
-
|
|
1008
|
+
|
|
1009
|
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
|
|
990
1010
|
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
|
991
1011
|
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
|
992
1012
|
|
|
993
|
-
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/
|
|
994
|
-
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/
|
|
995
|
-
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/
|
|
1013
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
|
|
1014
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
|
|
1015
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
|
|
1016
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
|
|
1017
|
+
|
|
1018
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
|
|
996
1019
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
|
997
1020
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
|
998
1021
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
|
1022
|
+
|
|
1023
|
+
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
|
|
1024
|
+
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
|
|
1025
|
+
|
|
999
1026
|
echo "cp oneAPI running time dll files to ./build/bin done"
|
|
1000
1027
|
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
|
1001
1028
|
|
|
1002
|
-
- name: Upload
|
|
1029
|
+
- name: Upload the release package
|
|
1003
1030
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1004
1031
|
uses: actions/upload-artifact@v4
|
|
1005
1032
|
with:
|
|
@@ -1030,6 +1057,11 @@ jobs:
|
|
|
1030
1057
|
run: |
|
|
1031
1058
|
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
|
1032
1059
|
|
|
1060
|
+
- name: Install ccache
|
|
1061
|
+
uses: hendrikmuhs/ccache-action@v1.2
|
|
1062
|
+
with:
|
|
1063
|
+
key: ${{ github.job }}
|
|
1064
|
+
|
|
1033
1065
|
- name: Build
|
|
1034
1066
|
id: cmake_build
|
|
1035
1067
|
run: |
|
|
@@ -1050,6 +1082,8 @@ jobs:
|
|
|
1050
1082
|
- name: Clone
|
|
1051
1083
|
id: checkout
|
|
1052
1084
|
uses: actions/checkout@v4
|
|
1085
|
+
with:
|
|
1086
|
+
fetch-depth: 0
|
|
1053
1087
|
|
|
1054
1088
|
- name: Install
|
|
1055
1089
|
id: depends
|
|
@@ -1109,6 +1143,29 @@ jobs:
|
|
|
1109
1143
|
- name: Checkout code
|
|
1110
1144
|
uses: actions/checkout@v4
|
|
1111
1145
|
|
|
1146
|
+
- name: Build
|
|
1147
|
+
id: cmake_build
|
|
1148
|
+
run: |
|
|
1149
|
+
sysctl -a
|
|
1150
|
+
mkdir build
|
|
1151
|
+
cd build
|
|
1152
|
+
cmake -G Xcode .. \
|
|
1153
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
1154
|
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
1155
|
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
1156
|
+
-DLLAMA_BUILD_TESTS=OFF \
|
|
1157
|
+
-DLLAMA_BUILD_SERVER=OFF \
|
|
1158
|
+
-DCMAKE_SYSTEM_NAME=iOS \
|
|
1159
|
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
|
1160
|
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
|
1161
|
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
|
1162
|
+
sudo cmake --install . --config Release
|
|
1163
|
+
|
|
1164
|
+
- name: xcodebuild for swift package
|
|
1165
|
+
id: xcodebuild
|
|
1166
|
+
run: |
|
|
1167
|
+
xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
|
|
1168
|
+
|
|
1112
1169
|
- name: Build Xcode project
|
|
1113
1170
|
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
|
1114
1171
|
|
|
@@ -1136,35 +1193,16 @@ jobs:
|
|
|
1136
1193
|
|
|
1137
1194
|
./gradlew build --no-daemon
|
|
1138
1195
|
|
|
1139
|
-
# freeBSD-latest:
|
|
1140
|
-
# runs-on: macos-12
|
|
1141
|
-
# steps:
|
|
1142
|
-
# - name: Clone
|
|
1143
|
-
# uses: actions/checkout@v4
|
|
1144
|
-
#
|
|
1145
|
-
# - name: Build
|
|
1146
|
-
# uses: cross-platform-actions/action@v0.19.0
|
|
1147
|
-
# with:
|
|
1148
|
-
# operating_system: freebsd
|
|
1149
|
-
# version: '13.2'
|
|
1150
|
-
# hypervisor: 'qemu'
|
|
1151
|
-
# run: |
|
|
1152
|
-
# sudo pkg update
|
|
1153
|
-
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
|
|
1154
|
-
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
|
|
1155
|
-
|
|
1156
1196
|
release:
|
|
1157
1197
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1158
1198
|
|
|
1159
1199
|
runs-on: ubuntu-latest
|
|
1160
1200
|
|
|
1161
1201
|
needs:
|
|
1162
|
-
- ubuntu-focal-make
|
|
1163
1202
|
- ubuntu-latest-cmake
|
|
1164
|
-
- macOS-latest-make
|
|
1165
1203
|
- macOS-latest-cmake
|
|
1166
1204
|
- windows-latest-cmake
|
|
1167
|
-
- windows-
|
|
1205
|
+
- windows-2019-cmake-cuda
|
|
1168
1206
|
- windows-latest-cmake-hip-release
|
|
1169
1207
|
- macOS-latest-cmake-arm64
|
|
1170
1208
|
- macOS-latest-cmake-x64
|
|
@@ -10,12 +10,10 @@
|
|
|
10
10
|
name: Publish Docker image
|
|
11
11
|
|
|
12
12
|
on:
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
|
18
|
-
workflow_dispatch: # allows manual triggering, useful for debugging
|
|
13
|
+
workflow_dispatch: # allows manual triggering
|
|
14
|
+
schedule:
|
|
15
|
+
# Rebuild daily rather than on every push because it is expensive
|
|
16
|
+
- cron: '12 4 * * *'
|
|
19
17
|
|
|
20
18
|
concurrency:
|
|
21
19
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -29,7 +27,6 @@ permissions:
|
|
|
29
27
|
jobs:
|
|
30
28
|
push_to_registry:
|
|
31
29
|
name: Push Docker image to Docker Hub
|
|
32
|
-
#if: github.event.pull_request.draft == false
|
|
33
30
|
|
|
34
31
|
runs-on: ubuntu-latest
|
|
35
32
|
env:
|
|
@@ -117,7 +114,7 @@ jobs:
|
|
|
117
114
|
swap-storage: true
|
|
118
115
|
|
|
119
116
|
- name: Build and push Docker image (tagged + versioned)
|
|
120
|
-
if: github.event_name == 'push'
|
|
117
|
+
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
|
121
118
|
uses: docker/build-push-action@v6
|
|
122
119
|
with:
|
|
123
120
|
context: .
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
name: flake8 Lint
|
|
2
2
|
|
|
3
|
-
on:
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
8
|
+
pull_request:
|
|
9
|
+
types: [opened, synchronize, reopened]
|
|
10
|
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
4
11
|
|
|
5
12
|
concurrency:
|
|
6
13
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -76,20 +76,26 @@ jobs:
|
|
|
76
76
|
run: |
|
|
77
77
|
pip install -r examples/server/tests/requirements.txt
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
# Setup nodejs (to be used for verifying bundled index.html)
|
|
80
|
+
- uses: actions/setup-node@v4
|
|
81
|
+
with:
|
|
82
|
+
node-version: '22.11.0'
|
|
83
|
+
|
|
84
|
+
- name: Verify bundled index.html
|
|
85
|
+
id: verify_server_index_html
|
|
81
86
|
run: |
|
|
82
87
|
git config --global --add safe.directory $(realpath .)
|
|
83
|
-
cd examples/server
|
|
84
|
-
git ls-files --others --modified
|
|
88
|
+
cd examples/server/webui
|
|
85
89
|
git status
|
|
86
|
-
|
|
90
|
+
npm ci
|
|
91
|
+
npm run build
|
|
87
92
|
git status
|
|
88
|
-
|
|
89
|
-
echo "Modified files: ${
|
|
90
|
-
if [ -n "${
|
|
91
|
-
echo "Repository is dirty or server
|
|
92
|
-
echo "
|
|
93
|
+
modified_files="$(git status -s)"
|
|
94
|
+
echo "Modified files: ${modified_files}"
|
|
95
|
+
if [ -n "${modified_files}" ]; then
|
|
96
|
+
echo "Repository is dirty or server/webui is not built as expected"
|
|
97
|
+
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
|
98
|
+
echo "${modified_files}"
|
|
93
99
|
exit 1
|
|
94
100
|
fi
|
|
95
101
|
|
|
@@ -122,14 +128,14 @@ jobs:
|
|
|
122
128
|
id: server_integration_tests
|
|
123
129
|
run: |
|
|
124
130
|
cd examples/server/tests
|
|
125
|
-
|
|
131
|
+
./tests.sh
|
|
126
132
|
|
|
127
133
|
- name: Slow tests
|
|
128
134
|
id: server_integration_tests_slow
|
|
129
135
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
130
136
|
run: |
|
|
131
137
|
cd examples/server/tests
|
|
132
|
-
|
|
138
|
+
SLOW_TESTS=1 ./tests.sh
|
|
133
139
|
|
|
134
140
|
|
|
135
141
|
server-windows:
|
|
@@ -180,11 +186,12 @@ jobs:
|
|
|
180
186
|
run: |
|
|
181
187
|
cd examples/server/tests
|
|
182
188
|
$env:PYTHONIOENCODING = ":replace"
|
|
183
|
-
|
|
189
|
+
pytest -v -x
|
|
184
190
|
|
|
185
191
|
- name: Slow tests
|
|
186
192
|
id: server_integration_tests_slow
|
|
187
193
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
188
194
|
run: |
|
|
189
195
|
cd examples/server/tests
|
|
190
|
-
|
|
196
|
+
$env:SLOW_TESTS = "1"
|
|
197
|
+
pytest -v -x
|