@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -10,12 +10,10 @@
|
|
|
10
10
|
name: Publish Docker image
|
|
11
11
|
|
|
12
12
|
on:
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
|
18
|
-
workflow_dispatch: # allows manual triggering, useful for debugging
|
|
13
|
+
workflow_dispatch: # allows manual triggering
|
|
14
|
+
schedule:
|
|
15
|
+
# Rebuild daily rather than on every push because it is expensive
|
|
16
|
+
- cron: '12 4 * * *'
|
|
19
17
|
|
|
20
18
|
concurrency:
|
|
21
19
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -29,7 +27,6 @@ permissions:
|
|
|
29
27
|
jobs:
|
|
30
28
|
push_to_registry:
|
|
31
29
|
name: Push Docker image to Docker Hub
|
|
32
|
-
#if: github.event.pull_request.draft == false
|
|
33
30
|
|
|
34
31
|
runs-on: ubuntu-latest
|
|
35
32
|
env:
|
|
@@ -43,6 +40,9 @@ jobs:
|
|
|
43
40
|
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
44
41
|
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
45
42
|
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
43
|
+
- { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
|
|
44
|
+
- { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
|
|
45
|
+
- { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
|
|
46
46
|
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
|
47
47
|
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
48
48
|
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
@@ -114,7 +114,7 @@ jobs:
|
|
|
114
114
|
swap-storage: true
|
|
115
115
|
|
|
116
116
|
- name: Build and push Docker image (tagged + versioned)
|
|
117
|
-
if: github.event_name == 'push'
|
|
117
|
+
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
|
118
118
|
uses: docker/build-push-action@v6
|
|
119
119
|
with:
|
|
120
120
|
context: .
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
name: flake8 Lint
|
|
2
2
|
|
|
3
|
-
on:
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
8
|
+
pull_request:
|
|
9
|
+
types: [opened, synchronize, reopened]
|
|
10
|
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
4
11
|
|
|
5
12
|
concurrency:
|
|
6
13
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
@@ -76,20 +76,26 @@ jobs:
|
|
|
76
76
|
run: |
|
|
77
77
|
pip install -r examples/server/tests/requirements.txt
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
# Setup nodejs (to be used for verifying bundled index.html)
|
|
80
|
+
- uses: actions/setup-node@v4
|
|
81
|
+
with:
|
|
82
|
+
node-version: '22.11.0'
|
|
83
|
+
|
|
84
|
+
- name: Verify bundled index.html
|
|
85
|
+
id: verify_server_index_html
|
|
81
86
|
run: |
|
|
82
87
|
git config --global --add safe.directory $(realpath .)
|
|
83
|
-
cd examples/server
|
|
84
|
-
git ls-files --others --modified
|
|
88
|
+
cd examples/server/webui
|
|
85
89
|
git status
|
|
86
|
-
|
|
90
|
+
npm ci
|
|
91
|
+
npm run build
|
|
87
92
|
git status
|
|
88
|
-
|
|
89
|
-
echo "Modified files: ${
|
|
90
|
-
if [ -n "${
|
|
91
|
-
echo "Repository is dirty or server
|
|
92
|
-
echo "
|
|
93
|
+
modified_files="$(git status -s)"
|
|
94
|
+
echo "Modified files: ${modified_files}"
|
|
95
|
+
if [ -n "${modified_files}" ]; then
|
|
96
|
+
echo "Repository is dirty or server/webui is not built as expected"
|
|
97
|
+
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
|
98
|
+
echo "${modified_files}"
|
|
93
99
|
exit 1
|
|
94
100
|
fi
|
|
95
101
|
|
|
@@ -122,14 +128,14 @@ jobs:
|
|
|
122
128
|
id: server_integration_tests
|
|
123
129
|
run: |
|
|
124
130
|
cd examples/server/tests
|
|
125
|
-
|
|
131
|
+
./tests.sh
|
|
126
132
|
|
|
127
133
|
- name: Slow tests
|
|
128
134
|
id: server_integration_tests_slow
|
|
129
135
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
130
136
|
run: |
|
|
131
137
|
cd examples/server/tests
|
|
132
|
-
|
|
138
|
+
SLOW_TESTS=1 ./tests.sh
|
|
133
139
|
|
|
134
140
|
|
|
135
141
|
server-windows:
|
|
@@ -180,11 +186,12 @@ jobs:
|
|
|
180
186
|
run: |
|
|
181
187
|
cd examples/server/tests
|
|
182
188
|
$env:PYTHONIOENCODING = ":replace"
|
|
183
|
-
|
|
189
|
+
pytest -v -x
|
|
184
190
|
|
|
185
191
|
- name: Slow tests
|
|
186
192
|
id: server_integration_tests_slow
|
|
187
193
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
188
194
|
run: |
|
|
189
195
|
cd examples/server/tests
|
|
190
|
-
|
|
196
|
+
$env:SLOW_TESTS = "1"
|
|
197
|
+
pytest -v -x
|
|
@@ -46,6 +46,11 @@ if (WIN32)
|
|
|
46
46
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
|
47
47
|
endif()
|
|
48
48
|
|
|
49
|
+
if (MSVC)
|
|
50
|
+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
|
51
|
+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
|
52
|
+
endif()
|
|
53
|
+
|
|
49
54
|
#
|
|
50
55
|
# option list
|
|
51
56
|
#
|
|
@@ -63,7 +68,7 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
|
|
63
68
|
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
|
64
69
|
|
|
65
70
|
# utils
|
|
66
|
-
option(LLAMA_BUILD_COMMON "llama: build common utils library"
|
|
71
|
+
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
|
|
67
72
|
|
|
68
73
|
# extra artifacts
|
|
69
74
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
|
@@ -75,6 +80,7 @@ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
|
|
75
80
|
|
|
76
81
|
# Required for relocatable CMake package
|
|
77
82
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
83
|
+
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
78
84
|
|
|
79
85
|
# override ggml options
|
|
80
86
|
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
|
|
@@ -136,7 +142,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
|
|
|
136
142
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
137
143
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
138
144
|
|
|
139
|
-
|
|
140
145
|
# At the moment some compile definitions are placed within the ggml/src
|
|
141
146
|
# directory but not exported on the `ggml` target. This could be improved by
|
|
142
147
|
# determining _precisely_ which defines are necessary for the llama-config
|
|
@@ -153,8 +158,11 @@ if (GGML_TARGET_DEFINES)
|
|
|
153
158
|
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
|
|
154
159
|
endif()
|
|
155
160
|
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
|
|
156
|
-
|
|
157
|
-
|
|
161
|
+
# all public headers
|
|
162
|
+
set(LLAMA_PUBLIC_HEADERS
|
|
163
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
|
|
164
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
|
|
165
|
+
set_target_properties(llama PROPERTIES PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
|
|
158
166
|
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
|
159
167
|
|
|
160
168
|
configure_package_config_file(
|
|
@@ -201,12 +209,12 @@ if (LLAMA_BUILD_COMMON)
|
|
|
201
209
|
add_subdirectory(common)
|
|
202
210
|
endif()
|
|
203
211
|
|
|
204
|
-
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
212
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
205
213
|
include(CTest)
|
|
206
214
|
add_subdirectory(tests)
|
|
207
215
|
endif()
|
|
208
216
|
|
|
209
|
-
if (LLAMA_BUILD_EXAMPLES)
|
|
217
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
210
218
|
add_subdirectory(examples)
|
|
211
219
|
add_subdirectory(pocs)
|
|
212
220
|
endif()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
set( CMAKE_SYSTEM_NAME Darwin )
|
|
2
|
+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
|
3
|
+
|
|
4
|
+
set( target arm64-apple-darwin-macho )
|
|
5
|
+
|
|
6
|
+
set( CMAKE_C_COMPILER clang )
|
|
7
|
+
set( CMAKE_CXX_COMPILER clang++ )
|
|
8
|
+
|
|
9
|
+
set( CMAKE_C_COMPILER_TARGET ${target} )
|
|
10
|
+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
|
11
|
+
|
|
12
|
+
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
|
13
|
+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
|
14
|
+
|
|
15
|
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
16
|
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
function(llama_add_compile_flags)
|
|
2
|
+
if (LLAMA_FATAL_WARNINGS)
|
|
3
|
+
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
4
|
+
list(APPEND C_FLAGS -Werror)
|
|
5
|
+
list(APPEND CXX_FLAGS -Werror)
|
|
6
|
+
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
|
7
|
+
add_compile_options(/WX)
|
|
8
|
+
endif()
|
|
9
|
+
endif()
|
|
10
|
+
|
|
11
|
+
if (LLAMA_ALL_WARNINGS)
|
|
12
|
+
if (NOT MSVC)
|
|
13
|
+
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
|
14
|
+
-Werror=implicit-int -Werror=implicit-function-declaration)
|
|
15
|
+
|
|
16
|
+
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
|
17
|
+
|
|
18
|
+
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
|
19
|
+
|
|
20
|
+
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
|
21
|
+
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
|
22
|
+
|
|
23
|
+
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
|
24
|
+
|
|
25
|
+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
|
26
|
+
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
|
27
|
+
else()
|
|
28
|
+
# todo : msvc
|
|
29
|
+
set(C_FLAGS "" PARENT_SCOPE)
|
|
30
|
+
set(CXX_FLAGS "" PARENT_SCOPE)
|
|
31
|
+
endif()
|
|
32
|
+
endif()
|
|
33
|
+
endfunction()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
set( CMAKE_SYSTEM_NAME Windows )
|
|
2
|
+
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
|
3
|
+
|
|
4
|
+
set( CMAKE_C_COMPILER clang )
|
|
5
|
+
set( CMAKE_CXX_COMPILER clang++ )
|
|
6
|
+
|
|
7
|
+
set( arch_c_flags "-march=native" )
|
|
8
|
+
|
|
9
|
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
|
|
10
|
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
|
|
11
|
+
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
find_package(Threads REQUIRED)
|
|
4
4
|
|
|
5
|
+
llama_add_compile_flags()
|
|
6
|
+
|
|
5
7
|
# Build info header
|
|
6
8
|
#
|
|
7
9
|
|
|
@@ -66,8 +68,8 @@ add_library(${TARGET} STATIC
|
|
|
66
68
|
ngram-cache.h
|
|
67
69
|
sampling.cpp
|
|
68
70
|
sampling.h
|
|
69
|
-
|
|
70
|
-
|
|
71
|
+
speculative.cpp
|
|
72
|
+
speculative.h
|
|
71
73
|
)
|
|
72
74
|
|
|
73
75
|
if (BUILD_SHARED_LIBS)
|
|
@@ -79,12 +81,12 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|
|
79
81
|
# Use curl to download model url
|
|
80
82
|
if (LLAMA_CURL)
|
|
81
83
|
find_package(CURL REQUIRED)
|
|
82
|
-
|
|
84
|
+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
83
85
|
include_directories(${CURL_INCLUDE_DIRS})
|
|
84
86
|
find_library(CURL_LIBRARY curl REQUIRED)
|
|
85
87
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
|
86
88
|
endif ()
|
|
87
89
|
|
|
88
90
|
target_include_directories(${TARGET} PUBLIC .)
|
|
89
|
-
target_compile_features (${TARGET} PUBLIC
|
|
91
|
+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
90
92
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|