@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -19,10 +19,18 @@ concurrency:
|
|
|
19
19
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
20
20
|
cancel-in-progress: true
|
|
21
21
|
|
|
22
|
+
# Fine-grant permission
|
|
23
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
24
|
+
permissions:
|
|
25
|
+
contents: write # for creating release
|
|
26
|
+
|
|
22
27
|
env:
|
|
23
28
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
24
29
|
GGML_NLOOP: 3
|
|
25
30
|
GGML_N_THREADS: 1
|
|
31
|
+
LLAMA_LOG_COLORS: 1
|
|
32
|
+
LLAMA_LOG_PREFIX: 1
|
|
33
|
+
LLAMA_LOG_TIMESTAMPS: 1
|
|
26
34
|
|
|
27
35
|
jobs:
|
|
28
36
|
macOS-latest-cmake-arm64:
|
|
@@ -47,7 +55,13 @@ jobs:
|
|
|
47
55
|
sysctl -a
|
|
48
56
|
mkdir build
|
|
49
57
|
cd build
|
|
50
|
-
cmake
|
|
58
|
+
cmake .. \
|
|
59
|
+
-DLLAMA_FATAL_WARNINGS=ON \
|
|
60
|
+
-DLLAMA_CURL=ON \
|
|
61
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
62
|
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
63
|
+
-DGGML_RPC=ON \
|
|
64
|
+
-DBUILD_SHARED_LIBS=OFF
|
|
51
65
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
52
66
|
|
|
53
67
|
- name: Test
|
|
@@ -84,7 +98,7 @@ jobs:
|
|
|
84
98
|
name: llama-bin-macos-arm64.zip
|
|
85
99
|
|
|
86
100
|
macOS-latest-cmake-x64:
|
|
87
|
-
runs-on: macos-
|
|
101
|
+
runs-on: macos-13
|
|
88
102
|
|
|
89
103
|
steps:
|
|
90
104
|
- name: Clone
|
|
@@ -105,7 +119,12 @@ jobs:
|
|
|
105
119
|
sysctl -a
|
|
106
120
|
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
|
107
121
|
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
|
108
|
-
cmake -B build
|
|
122
|
+
cmake -B build \
|
|
123
|
+
-DLLAMA_FATAL_WARNINGS=ON \
|
|
124
|
+
-DLLAMA_CURL=ON \
|
|
125
|
+
-DGGML_METAL=OFF \
|
|
126
|
+
-DGGML_RPC=ON \
|
|
127
|
+
-DBUILD_SHARED_LIBS=OFF
|
|
109
128
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
110
129
|
|
|
111
130
|
- name: Test
|
|
@@ -222,7 +241,7 @@ jobs:
|
|
|
222
241
|
run: |
|
|
223
242
|
mkdir build
|
|
224
243
|
cd build
|
|
225
|
-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
|
|
244
|
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
|
|
226
245
|
cmake --build . --config Release -j $(nproc)
|
|
227
246
|
|
|
228
247
|
- name: Test
|
|
@@ -375,7 +394,7 @@ jobs:
|
|
|
375
394
|
steps:
|
|
376
395
|
- name: Clone
|
|
377
396
|
id: checkout
|
|
378
|
-
uses: actions/checkout@
|
|
397
|
+
uses: actions/checkout@v4
|
|
379
398
|
|
|
380
399
|
- name: Dependencies
|
|
381
400
|
id: depends
|
|
@@ -386,22 +405,43 @@ jobs:
|
|
|
386
405
|
- name: Build with native CMake HIP support
|
|
387
406
|
id: cmake_build
|
|
388
407
|
run: |
|
|
389
|
-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -
|
|
408
|
+
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
|
|
390
409
|
cmake --build build --config Release -j $(nproc)
|
|
391
410
|
|
|
392
411
|
- name: Build with legacy HIP support
|
|
393
412
|
id: cmake_build_legacy_hip
|
|
394
413
|
run: |
|
|
395
|
-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -
|
|
414
|
+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
|
|
396
415
|
cmake --build build2 --config Release -j $(nproc)
|
|
397
416
|
|
|
417
|
+
ubuntu-22-cmake-musa:
|
|
418
|
+
runs-on: ubuntu-22.04
|
|
419
|
+
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
|
|
420
|
+
|
|
421
|
+
steps:
|
|
422
|
+
- name: Clone
|
|
423
|
+
id: checkout
|
|
424
|
+
uses: actions/checkout@v4
|
|
425
|
+
|
|
426
|
+
- name: Dependencies
|
|
427
|
+
id: depends
|
|
428
|
+
run: |
|
|
429
|
+
apt-get update
|
|
430
|
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
|
431
|
+
|
|
432
|
+
- name: Build with native CMake MUSA support
|
|
433
|
+
id: cmake_build
|
|
434
|
+
run: |
|
|
435
|
+
cmake -B build -S . -DGGML_MUSA=ON
|
|
436
|
+
cmake --build build --config Release -j $(nproc)
|
|
437
|
+
|
|
398
438
|
ubuntu-22-cmake-sycl:
|
|
399
439
|
runs-on: ubuntu-22.04
|
|
400
440
|
|
|
401
441
|
continue-on-error: true
|
|
402
442
|
|
|
403
443
|
steps:
|
|
404
|
-
- uses: actions/checkout@
|
|
444
|
+
- uses: actions/checkout@v4
|
|
405
445
|
|
|
406
446
|
- name: add oneAPI to apt
|
|
407
447
|
shell: bash
|
|
@@ -442,7 +482,7 @@ jobs:
|
|
|
442
482
|
continue-on-error: true
|
|
443
483
|
|
|
444
484
|
steps:
|
|
445
|
-
- uses: actions/checkout@
|
|
485
|
+
- uses: actions/checkout@v4
|
|
446
486
|
|
|
447
487
|
- name: add oneAPI to apt
|
|
448
488
|
shell: bash
|
|
@@ -546,7 +586,7 @@ jobs:
|
|
|
546
586
|
steps:
|
|
547
587
|
- name: Clone
|
|
548
588
|
id: checkout
|
|
549
|
-
uses: actions/checkout@
|
|
589
|
+
uses: actions/checkout@v4
|
|
550
590
|
|
|
551
591
|
- name: Dependencies
|
|
552
592
|
id: depends
|
|
@@ -561,6 +601,7 @@ jobs:
|
|
|
561
601
|
mkdir build
|
|
562
602
|
cd build
|
|
563
603
|
cmake -G Xcode .. \
|
|
604
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
564
605
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
565
606
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
566
607
|
-DLLAMA_BUILD_TESTS=OFF \
|
|
@@ -576,7 +617,7 @@ jobs:
|
|
|
576
617
|
steps:
|
|
577
618
|
- name: Clone
|
|
578
619
|
id: checkout
|
|
579
|
-
uses: actions/checkout@
|
|
620
|
+
uses: actions/checkout@v4
|
|
580
621
|
|
|
581
622
|
- name: Dependencies
|
|
582
623
|
id: depends
|
|
@@ -591,6 +632,7 @@ jobs:
|
|
|
591
632
|
mkdir build
|
|
592
633
|
cd build
|
|
593
634
|
cmake -G Xcode .. \
|
|
635
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
594
636
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
595
637
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
596
638
|
-DLLAMA_BUILD_TESTS=OFF \
|
|
@@ -610,7 +652,7 @@ jobs:
|
|
|
610
652
|
steps:
|
|
611
653
|
- name: Clone
|
|
612
654
|
id: checkout
|
|
613
|
-
uses: actions/checkout@
|
|
655
|
+
uses: actions/checkout@v4
|
|
614
656
|
|
|
615
657
|
- name: Dependencies
|
|
616
658
|
id: depends
|
|
@@ -696,22 +738,20 @@ jobs:
|
|
|
696
738
|
strategy:
|
|
697
739
|
matrix:
|
|
698
740
|
include:
|
|
699
|
-
- build: 'rpc-x64'
|
|
700
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
|
701
741
|
- build: 'noavx-x64'
|
|
702
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
|
742
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
|
703
743
|
- build: 'avx2-x64'
|
|
704
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
744
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
|
705
745
|
- build: 'avx-x64'
|
|
706
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
|
746
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
|
707
747
|
- build: 'avx512-x64'
|
|
708
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
|
748
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
|
709
749
|
- build: 'openblas-x64'
|
|
710
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
750
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
711
751
|
- build: 'kompute-x64'
|
|
712
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
|
752
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
|
713
753
|
- build: 'vulkan-x64'
|
|
714
|
-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
|
754
|
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
|
715
755
|
- build: 'llvm-arm64'
|
|
716
756
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
|
717
757
|
- build: 'msvc-arm64'
|
|
@@ -728,7 +768,7 @@ jobs:
|
|
|
728
768
|
id: clone_kompute
|
|
729
769
|
if: ${{ matrix.build == 'kompute-x64' }}
|
|
730
770
|
run: |
|
|
731
|
-
git submodule update --init ggml/src/kompute
|
|
771
|
+
git submodule update --init ggml/src/ggml-kompute/kompute
|
|
732
772
|
|
|
733
773
|
- name: Download OpenBLAS
|
|
734
774
|
id: get_openblas
|
|
@@ -859,8 +899,9 @@ jobs:
|
|
|
859
899
|
run: |
|
|
860
900
|
mkdir build
|
|
861
901
|
cd build
|
|
862
|
-
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
|
|
863
|
-
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
|
902
|
+
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
|
|
903
|
+
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
|
|
904
|
+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
|
864
905
|
|
|
865
906
|
- name: Determine tag name
|
|
866
907
|
id: tag
|
|
@@ -910,7 +951,7 @@ jobs:
|
|
|
910
951
|
shell: bash
|
|
911
952
|
|
|
912
953
|
env:
|
|
913
|
-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/
|
|
954
|
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
|
|
914
955
|
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
|
|
915
956
|
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
|
916
957
|
steps:
|
|
@@ -954,6 +995,7 @@ jobs:
|
|
|
954
995
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
|
|
955
996
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
|
956
997
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
|
998
|
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
|
957
999
|
echo "cp oneAPI running time dll files to ./build/bin done"
|
|
958
1000
|
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
|
959
1001
|
|
|
@@ -965,19 +1007,56 @@ jobs:
|
|
|
965
1007
|
name: llama-bin-win-sycl-x64.zip
|
|
966
1008
|
|
|
967
1009
|
windows-latest-cmake-hip:
|
|
1010
|
+
if: ${{ github.event.inputs.create_release != 'true' }}
|
|
1011
|
+
runs-on: windows-latest
|
|
1012
|
+
|
|
1013
|
+
steps:
|
|
1014
|
+
- name: Clone
|
|
1015
|
+
id: checkout
|
|
1016
|
+
uses: actions/checkout@v4
|
|
1017
|
+
|
|
1018
|
+
- name: Install
|
|
1019
|
+
id: depends
|
|
1020
|
+
run: |
|
|
1021
|
+
$ErrorActionPreference = "Stop"
|
|
1022
|
+
write-host "Downloading AMD HIP SDK Installer"
|
|
1023
|
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
|
1024
|
+
write-host "Installing AMD HIP SDK"
|
|
1025
|
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
|
1026
|
+
write-host "Completed AMD HIP SDK installation"
|
|
1027
|
+
|
|
1028
|
+
- name: Verify ROCm
|
|
1029
|
+
id: verify
|
|
1030
|
+
run: |
|
|
1031
|
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
|
1032
|
+
|
|
1033
|
+
- name: Build
|
|
1034
|
+
id: cmake_build
|
|
1035
|
+
run: |
|
|
1036
|
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
1037
|
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
|
1038
|
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
|
|
1039
|
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1040
|
+
|
|
1041
|
+
windows-latest-cmake-hip-release:
|
|
1042
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
968
1043
|
runs-on: windows-latest
|
|
969
1044
|
|
|
1045
|
+
strategy:
|
|
1046
|
+
matrix:
|
|
1047
|
+
gpu_target: [gfx1100, gfx1101, gfx1030]
|
|
1048
|
+
|
|
970
1049
|
steps:
|
|
971
1050
|
- name: Clone
|
|
972
1051
|
id: checkout
|
|
973
|
-
uses: actions/checkout@
|
|
1052
|
+
uses: actions/checkout@v4
|
|
974
1053
|
|
|
975
1054
|
- name: Install
|
|
976
1055
|
id: depends
|
|
977
1056
|
run: |
|
|
978
1057
|
$ErrorActionPreference = "Stop"
|
|
979
1058
|
write-host "Downloading AMD HIP SDK Installer"
|
|
980
|
-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-
|
|
1059
|
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
|
981
1060
|
write-host "Installing AMD HIP SDK"
|
|
982
1061
|
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
|
983
1062
|
write-host "Completed AMD HIP SDK installation"
|
|
@@ -992,8 +1071,36 @@ jobs:
|
|
|
992
1071
|
run: |
|
|
993
1072
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
994
1073
|
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
|
995
|
-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -
|
|
996
|
-
cmake --build build
|
|
1074
|
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
|
|
1075
|
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1076
|
+
md "build\bin\rocblas\library\"
|
|
1077
|
+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
|
1078
|
+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
|
1079
|
+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
|
1080
|
+
|
|
1081
|
+
- name: Determine tag name
|
|
1082
|
+
id: tag
|
|
1083
|
+
shell: bash
|
|
1084
|
+
run: |
|
|
1085
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
1086
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
1087
|
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
1088
|
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
1089
|
+
else
|
|
1090
|
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
1091
|
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
1092
|
+
fi
|
|
1093
|
+
|
|
1094
|
+
- name: Pack artifacts
|
|
1095
|
+
id: pack_artifacts
|
|
1096
|
+
run: |
|
|
1097
|
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
|
1098
|
+
|
|
1099
|
+
- name: Upload artifacts
|
|
1100
|
+
uses: actions/upload-artifact@v4
|
|
1101
|
+
with:
|
|
1102
|
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
|
1103
|
+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
|
997
1104
|
|
|
998
1105
|
ios-xcode-build:
|
|
999
1106
|
runs-on: macos-latest
|
|
@@ -1058,6 +1165,7 @@ jobs:
|
|
|
1058
1165
|
- macOS-latest-cmake
|
|
1059
1166
|
- windows-latest-cmake
|
|
1060
1167
|
- windows-latest-cmake-cuda
|
|
1168
|
+
- windows-latest-cmake-hip-release
|
|
1061
1169
|
- macOS-latest-cmake-arm64
|
|
1062
1170
|
- macOS-latest-cmake-x64
|
|
1063
1171
|
|
|
@@ -3,6 +3,11 @@ on:
|
|
|
3
3
|
schedule:
|
|
4
4
|
- cron: "42 0 * * *"
|
|
5
5
|
|
|
6
|
+
# Fine-grant permission
|
|
7
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
8
|
+
permissions:
|
|
9
|
+
issues: write
|
|
10
|
+
|
|
6
11
|
jobs:
|
|
7
12
|
close-issues:
|
|
8
13
|
runs-on: ubuntu-latest
|
|
@@ -15,11 +15,17 @@ on:
|
|
|
15
15
|
branches:
|
|
16
16
|
- master
|
|
17
17
|
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
|
18
|
+
workflow_dispatch: # allows manual triggering, useful for debugging
|
|
18
19
|
|
|
19
20
|
concurrency:
|
|
20
21
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
21
22
|
cancel-in-progress: true
|
|
22
23
|
|
|
24
|
+
# Fine-grant permission
|
|
25
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
26
|
+
permissions:
|
|
27
|
+
packages: write
|
|
28
|
+
|
|
23
29
|
jobs:
|
|
24
30
|
push_to_registry:
|
|
25
31
|
name: Push Docker image to Docker Hub
|
|
@@ -37,15 +43,20 @@ jobs:
|
|
|
37
43
|
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
38
44
|
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
39
45
|
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
40
|
-
- { tag: "light-
|
|
41
|
-
- { tag: "server-
|
|
42
|
-
|
|
46
|
+
- { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
|
|
47
|
+
- { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
|
|
48
|
+
- { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
|
|
49
|
+
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
|
50
|
+
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
51
|
+
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
43
52
|
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
44
53
|
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
|
45
54
|
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
46
55
|
steps:
|
|
47
56
|
- name: Check out the repo
|
|
48
57
|
uses: actions/checkout@v4
|
|
58
|
+
with:
|
|
59
|
+
fetch-depth: 0 # preserve git history, so we can determine the build number
|
|
49
60
|
|
|
50
61
|
- name: Set up QEMU
|
|
51
62
|
uses: docker/setup-qemu-action@v2
|
|
@@ -60,6 +71,34 @@ jobs:
|
|
|
60
71
|
username: ${{ github.repository_owner }}
|
|
61
72
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
62
73
|
|
|
74
|
+
- name: Determine tag name
|
|
75
|
+
id: tag
|
|
76
|
+
shell: bash
|
|
77
|
+
run: |
|
|
78
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
79
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
80
|
+
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
|
81
|
+
REPO_NAME="${{ github.event.repository.name }}"
|
|
82
|
+
|
|
83
|
+
# determine tag name postfix (build number, commit hash)
|
|
84
|
+
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
|
85
|
+
TAG_POSTFIX="b${BUILD_NUMBER}"
|
|
86
|
+
else
|
|
87
|
+
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
|
88
|
+
TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# list all tags possible
|
|
92
|
+
TAGS=""
|
|
93
|
+
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
|
|
94
|
+
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
|
|
95
|
+
|
|
96
|
+
echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
|
|
97
|
+
echo "output_tags=$TAGS" # print out for debugging
|
|
98
|
+
env:
|
|
99
|
+
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
100
|
+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
101
|
+
|
|
63
102
|
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
|
64
103
|
- name: Free Disk Space (Ubuntu)
|
|
65
104
|
uses: jlumbroso/free-disk-space@main
|
|
@@ -77,40 +116,13 @@ jobs:
|
|
|
77
116
|
docker-images: true
|
|
78
117
|
swap-storage: true
|
|
79
118
|
|
|
80
|
-
- name:
|
|
81
|
-
id: tag
|
|
82
|
-
shell: bash
|
|
83
|
-
run: |
|
|
84
|
-
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
85
|
-
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
86
|
-
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
87
|
-
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
88
|
-
else
|
|
89
|
-
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
90
|
-
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
91
|
-
fi
|
|
92
|
-
|
|
93
|
-
- name: Downcase github.repository_owner
|
|
94
|
-
run: |
|
|
95
|
-
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
|
96
|
-
env:
|
|
97
|
-
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
98
|
-
|
|
99
|
-
- name: Build and push Docker image (versioned)
|
|
119
|
+
- name: Build and push Docker image (tagged + versioned)
|
|
100
120
|
if: github.event_name == 'push'
|
|
101
|
-
uses: docker/build-push-action@
|
|
121
|
+
uses: docker/build-push-action@v6
|
|
102
122
|
with:
|
|
103
123
|
context: .
|
|
104
124
|
push: true
|
|
105
125
|
platforms: ${{ matrix.config.platforms }}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
- name: Build and push Docker image (tagged)
|
|
110
|
-
uses: docker/build-push-action@v4
|
|
111
|
-
with:
|
|
112
|
-
context: .
|
|
113
|
-
push: ${{ github.event_name == 'push' }}
|
|
114
|
-
platforms: ${{ matrix.config.platforms }}
|
|
115
|
-
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
|
126
|
+
# tag list is generated from step above
|
|
127
|
+
tags: ${{ steps.tag.outputs.output_tags }}
|
|
116
128
|
file: ${{ matrix.config.dockerfile }}
|
|
@@ -21,6 +21,13 @@ concurrency:
|
|
|
21
21
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
22
22
|
cancel-in-progress: true
|
|
23
23
|
|
|
24
|
+
# Fine-grant permission
|
|
25
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
26
|
+
permissions:
|
|
27
|
+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
|
28
|
+
id-token: write
|
|
29
|
+
contents: read
|
|
30
|
+
|
|
24
31
|
jobs:
|
|
25
32
|
nix-build-aarch64:
|
|
26
33
|
runs-on: ubuntu-latest
|
|
@@ -12,6 +12,13 @@ concurrency:
|
|
|
12
12
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
13
13
|
cancel-in-progress: true
|
|
14
14
|
|
|
15
|
+
# Fine-grant permission
|
|
16
|
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
17
|
+
permissions:
|
|
18
|
+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
|
19
|
+
id-token: write
|
|
20
|
+
contents: read
|
|
21
|
+
|
|
15
22
|
jobs:
|
|
16
23
|
nix-eval:
|
|
17
24
|
strategy:
|
|
@@ -6,15 +6,13 @@ on:
|
|
|
6
6
|
- '.github/workflows/python-check-requirements.yml'
|
|
7
7
|
- 'scripts/check-requirements.sh'
|
|
8
8
|
- 'convert*.py'
|
|
9
|
-
- 'requirements
|
|
10
|
-
- 'requirements/*.txt'
|
|
9
|
+
- '**/requirements*.txt'
|
|
11
10
|
pull_request:
|
|
12
11
|
paths:
|
|
13
12
|
- '.github/workflows/python-check-requirements.yml'
|
|
14
13
|
- 'scripts/check-requirements.sh'
|
|
15
14
|
- 'convert*.py'
|
|
16
|
-
- 'requirements
|
|
17
|
-
- 'requirements/*.txt'
|
|
15
|
+
- '**/requirements*.txt'
|
|
18
16
|
|
|
19
17
|
concurrency:
|
|
20
18
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -4,11 +4,13 @@ on:
|
|
|
4
4
|
push:
|
|
5
5
|
paths:
|
|
6
6
|
- '.github/workflows/python-type-check.yml'
|
|
7
|
+
- 'pyrightconfig.json'
|
|
7
8
|
- '**.py'
|
|
8
9
|
- '**/requirements*.txt'
|
|
9
10
|
pull_request:
|
|
10
11
|
paths:
|
|
11
12
|
- '.github/workflows/python-type-check.yml'
|
|
13
|
+
- 'pyrightconfig.json'
|
|
12
14
|
- '**.py'
|
|
13
15
|
- '**/requirements*.txt'
|
|
14
16
|
|
|
@@ -33,6 +35,6 @@ jobs:
|
|
|
33
35
|
- name: Type-check with Pyright
|
|
34
36
|
uses: jakebailey/pyright-action@v2
|
|
35
37
|
with:
|
|
36
|
-
version: 1.1.
|
|
38
|
+
version: 1.1.382
|
|
37
39
|
level: warning
|
|
38
40
|
warnings: true
|
|
@@ -20,6 +20,12 @@ on:
|
|
|
20
20
|
types: [opened, synchronize, reopened]
|
|
21
21
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
|
22
22
|
|
|
23
|
+
env:
|
|
24
|
+
LLAMA_LOG_COLORS: 1
|
|
25
|
+
LLAMA_LOG_PREFIX: 1
|
|
26
|
+
LLAMA_LOG_TIMESTAMPS: 1
|
|
27
|
+
LLAMA_LOG_VERBOSITY: 10
|
|
28
|
+
|
|
23
29
|
concurrency:
|
|
24
30
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
|
25
31
|
cancel-in-progress: true
|
|
@@ -173,6 +179,7 @@ jobs:
|
|
|
173
179
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
|
174
180
|
run: |
|
|
175
181
|
cd examples/server/tests
|
|
182
|
+
$env:PYTHONIOENCODING = ":replace"
|
|
176
183
|
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
|
177
184
|
|
|
178
185
|
- name: Slow tests
|
|
@@ -62,6 +62,9 @@ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
|
|
|
62
62
|
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
|
63
63
|
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
|
64
64
|
|
|
65
|
+
# utils
|
|
66
|
+
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
|
|
67
|
+
|
|
65
68
|
# extra artifacts
|
|
66
69
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
|
67
70
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
|
@@ -82,11 +85,15 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
|
82
85
|
|
|
83
86
|
# change the default for these ggml options
|
|
84
87
|
if (NOT DEFINED GGML_LLAMAFILE)
|
|
85
|
-
set(
|
|
88
|
+
set(GGML_LLAMAFILE_DEFAULT ON)
|
|
89
|
+
endif()
|
|
90
|
+
|
|
91
|
+
if (NOT DEFINED GGML_AMX)
|
|
92
|
+
set(GGML_AMX ON)
|
|
86
93
|
endif()
|
|
87
94
|
|
|
88
|
-
if (NOT DEFINED
|
|
89
|
-
set(
|
|
95
|
+
if (NOT DEFINED GGML_CUDA_GRAPHS)
|
|
96
|
+
set(GGML_CUDA_GRAPHS_DEFAULT ON)
|
|
90
97
|
endif()
|
|
91
98
|
|
|
92
99
|
# transition helpers
|
|
@@ -133,15 +140,21 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
|
|
|
133
140
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
134
141
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
135
142
|
|
|
136
|
-
|
|
137
143
|
# At the moment some compile definitions are placed within the ggml/src
|
|
138
144
|
# directory but not exported on the `ggml` target. This could be improved by
|
|
139
145
|
# determining _precisely_ which defines are necessary for the llama-config
|
|
140
146
|
# package.
|
|
141
147
|
#
|
|
142
|
-
|
|
148
|
+
set(GGML_TRANSIENT_DEFINES)
|
|
149
|
+
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
|
|
150
|
+
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
|
|
151
|
+
if (GGML_DIR_DEFINES)
|
|
152
|
+
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
|
|
153
|
+
endif()
|
|
143
154
|
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
|
|
144
|
-
|
|
155
|
+
if (GGML_TARGET_DEFINES)
|
|
156
|
+
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
|
|
157
|
+
endif()
|
|
145
158
|
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
|
|
146
159
|
|
|
147
160
|
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
|
|
@@ -184,17 +197,19 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
|
|
184
197
|
DESTINATION lib/pkgconfig)
|
|
185
198
|
|
|
186
199
|
#
|
|
187
|
-
# programs, examples and tests
|
|
200
|
+
# utils, programs, examples and tests
|
|
188
201
|
#
|
|
189
202
|
|
|
190
|
-
|
|
203
|
+
if (LLAMA_BUILD_COMMON)
|
|
204
|
+
add_subdirectory(common)
|
|
205
|
+
endif()
|
|
191
206
|
|
|
192
|
-
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
207
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
193
208
|
include(CTest)
|
|
194
209
|
add_subdirectory(tests)
|
|
195
|
-
endif
|
|
210
|
+
endif()
|
|
196
211
|
|
|
197
|
-
if (LLAMA_BUILD_EXAMPLES)
|
|
212
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
198
213
|
add_subdirectory(examples)
|
|
199
214
|
add_subdirectory(pocs)
|
|
200
215
|
endif()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
set( CMAKE_SYSTEM_NAME Darwin )
|
|
2
|
+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
|
3
|
+
|
|
4
|
+
set( target arm64-apple-darwin-macho )
|
|
5
|
+
|
|
6
|
+
set( CMAKE_C_COMPILER clang )
|
|
7
|
+
set( CMAKE_CXX_COMPILER clang++ )
|
|
8
|
+
|
|
9
|
+
set( CMAKE_C_COMPILER_TARGET ${target} )
|
|
10
|
+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
|
11
|
+
|
|
12
|
+
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
|
13
|
+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
|
14
|
+
|
|
15
|
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
16
|
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|