npm - @fugood/llama.node - Versions diffs - 0.3.2 → 0.3.4 - Mend

@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (286) hide show

package/CMakeLists.txt +7 -0
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +18 -1
package/package.json +1 -1
package/src/DetokenizeWorker.cpp +1 -1
package/src/EmbeddingWorker.cpp +17 -7
package/src/EmbeddingWorker.h +2 -1
package/src/LlamaCompletionWorker.cpp +8 -8
package/src/LlamaCompletionWorker.h +2 -2
package/src/LlamaContext.cpp +89 -27
package/src/LlamaContext.h +2 -0
package/src/TokenizeWorker.cpp +1 -1
package/src/common.hpp +4 -4
package/src/llama.cpp/.github/workflows/build.yml +240 -168
package/src/llama.cpp/.github/workflows/docker.yml +8 -8
package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
package/src/llama.cpp/.github/workflows/server.yml +21 -14
package/src/llama.cpp/CMakeLists.txt +14 -6
package/src/llama.cpp/Sources/llama/llama.h +4 -0
package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
package/src/llama.cpp/cmake/common.cmake +33 -0
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
package/src/llama.cpp/common/CMakeLists.txt +6 -4
package/src/llama.cpp/common/arg.cpp +986 -770
package/src/llama.cpp/common/arg.h +22 -22
package/src/llama.cpp/common/common.cpp +212 -351
package/src/llama.cpp/common/common.h +204 -117
package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
package/src/llama.cpp/common/log.cpp +50 -50
package/src/llama.cpp/common/log.h +18 -18
package/src/llama.cpp/common/ngram-cache.cpp +36 -36
package/src/llama.cpp/common/ngram-cache.h +19 -19
package/src/llama.cpp/common/sampling.cpp +163 -121
package/src/llama.cpp/common/sampling.h +41 -20
package/src/llama.cpp/common/speculative.cpp +274 -0
package/src/llama.cpp/common/speculative.h +28 -0
package/src/llama.cpp/docs/build.md +134 -161
package/src/llama.cpp/examples/CMakeLists.txt +33 -14
package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/batched/batched.cpp +19 -18
package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/infill/infill.cpp +41 -87
package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
package/src/llama.cpp/examples/llava/clip.cpp +263 -66
package/src/llama.cpp/examples/llava/clip.h +8 -2
package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
package/src/llama.cpp/examples/llava/llava.cpp +83 -22
package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/main/main.cpp +73 -114
package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/run/run.cpp +911 -0
package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
package/src/llama.cpp/examples/server/server.cpp +2073 -1339
package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
package/src/llama.cpp/examples/server/utils.hpp +354 -277
package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
package/src/llama.cpp/examples/simple/simple.cpp +130 -94
package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/tts/tts.cpp +932 -0
package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
package/src/llama.cpp/ggml/include/ggml.h +159 -417
package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
package/src/llama.cpp/include/llama-cpp.h +25 -0
package/src/llama.cpp/include/llama.h +93 -52
package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
package/src/llama.cpp/src/CMakeLists.txt +4 -8
package/src/llama.cpp/src/llama-grammar.cpp +15 -15
package/src/llama.cpp/src/llama-grammar.h +2 -5
package/src/llama.cpp/src/llama-sampling.cpp +779 -194
package/src/llama.cpp/src/llama-sampling.h +21 -2
package/src/llama.cpp/src/llama-vocab.cpp +55 -10
package/src/llama.cpp/src/llama-vocab.h +35 -11
package/src/llama.cpp/src/llama.cpp +4317 -2979
package/src/llama.cpp/src/unicode-data.cpp +2 -2
package/src/llama.cpp/src/unicode.cpp +62 -51
package/src/llama.cpp/src/unicode.h +9 -10
package/src/llama.cpp/tests/CMakeLists.txt +48 -38
package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
package/src/llama.cpp/tests/test-barrier.cpp +1 -0
package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
package/src/llama.cpp/tests/test-log.cpp +2 -2
package/src/llama.cpp/tests/test-opt.cpp +853 -142
package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
package/src/llama.cpp/tests/test-rope.cpp +62 -20
package/src/llama.cpp/tests/test-sampling.cpp +163 -138
package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
package/src/llama.cpp/common/train.cpp +0 -1515
package/src/llama.cpp/common/train.h +0 -233
package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
/package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
/package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0

package/src/llama.cpp/.github/workflows/docker.yml CHANGED Viewed

@@ -10,12 +10,10 @@
 name: Publish Docker image
 on:
-  #pull_request:
-  push:
-    branches:
-      - master
-    paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
-  workflow_dispatch: # allows manual triggering, useful for debugging
+  workflow_dispatch: # allows manual triggering
+  schedule:
+    # Rebuild daily rather than on every push because it is expensive
+    - cron: '12 4 * * *'
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -29,7 +27,6 @@ permissions:
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
-    #if: github.event.pull_request.draft == false
     runs-on: ubuntu-latest
     env:
@@ -43,6 +40,9 @@ jobs:
           - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
           - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
+          - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
+          - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
+          - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
           # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
           #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
           #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
@@ -114,7 +114,7 @@ jobs:
           swap-storage: true
       - name: Build and push Docker image (tagged + versioned)
-        if: github.event_name == 'push'
+        if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
         uses: docker/build-push-action@v6
         with:
           context: .

package/src/llama.cpp/.github/workflows/python-lint.yml CHANGED Viewed

@@ -1,6 +1,13 @@
 name: flake8 Lint
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - master
+    paths: ['.github/workflows/python-lint.yml', '**/*.py']
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths: ['.github/workflows/python-lint.yml', '**/*.py']
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

package/src/llama.cpp/.github/workflows/server.yml CHANGED Viewed

@@ -76,20 +76,26 @@ jobs:
         run: |
           pip install -r examples/server/tests/requirements.txt
-      - name: Verify server deps
-        id: verify_server_deps
+      # Setup nodejs (to be used for verifying bundled index.html)
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22.11.0'
+      - name: Verify bundled index.html
+        id: verify_server_index_html
         run: |
           git config --global --add safe.directory $(realpath .)
-          cd examples/server
-          git ls-files --others --modified
+          cd examples/server/webui
           git status
-          ./deps.sh
+          npm ci
+          npm run build
           git status
-          not_ignored_files="$(git ls-files --others --modified)"
-          echo "Modified files: ${not_ignored_files}"
-          if [ -n "${not_ignored_files}" ]; then
-            echo "Repository is dirty or server deps are not built as expected"
-            echo "${not_ignored_files}"
+          modified_files="$(git status -s)"
+          echo "Modified files: ${modified_files}"
+          if [ -n "${modified_files}" ]; then
+            echo "Repository is dirty or server/webui is not built as expected"
+            echo "Hint: You may need to follow Web UI build guide in server/README.md"
+            echo "${modified_files}"
             exit 1
           fi
@@ -122,14 +128,14 @@ jobs:
         id: server_integration_tests
         run: |
           cd examples/server/tests
-          PORT=8888 ./tests.sh
+          ./tests.sh
       - name: Slow tests
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
           cd examples/server/tests
-          PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
+          SLOW_TESTS=1 ./tests.sh
   server-windows:
@@ -180,11 +186,12 @@ jobs:
         run: |
           cd examples/server/tests
           $env:PYTHONIOENCODING = ":replace"
-          behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
+          pytest -v -x
       - name: Slow tests
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
           cd examples/server/tests
-          behave.exe --stop --no-skipped --no-capture --tags slow
+          $env:SLOW_TESTS = "1"
+          pytest -v -x

package/src/llama.cpp/CMakeLists.txt CHANGED Viewed

@@ -46,6 +46,11 @@ if (WIN32)
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 endif()
+if (MSVC)
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
+endif()
 #
 # option list
 #
@@ -63,7 +68,7 @@ option(LLAMA_SANITIZE_ADDRESS   "llama: enable address sanitizer"   OFF)
 option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
 # utils
-option(LLAMA_BUILD_COMMON "llama: build common utils library" ON)
+option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
 # extra artifacts
 option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
@@ -75,6 +80,7 @@ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
 # override ggml options
 set(GGML_SANITIZE_THREAD    ${LLAMA_SANITIZE_THREAD})
@@ -136,7 +142,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
 set(LLAMA_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
 set(LLAMA_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
 # At the moment some compile definitions are placed within the ggml/src
 # directory but not exported on the `ggml` target. This could be improved by
 # determining _precisely_ which defines are necessary for the llama-config
@@ -153,8 +158,11 @@ if (GGML_TARGET_DEFINES)
     list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
 endif()
 get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
-set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
+# all public headers
+set(LLAMA_PUBLIC_HEADERS
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
+set_target_properties(llama PROPERTIES PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
 install(TARGETS llama LIBRARY PUBLIC_HEADER)
 configure_package_config_file(
@@ -201,12 +209,12 @@ if (LLAMA_BUILD_COMMON)
     add_subdirectory(common)
 endif()
-if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
     include(CTest)
     add_subdirectory(tests)
 endif()
-if (LLAMA_BUILD_EXAMPLES)
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
     add_subdirectory(examples)
     add_subdirectory(pocs)
 endif()

package/src/llama.cpp/Sources/llama/llama.h ADDED Viewed

@@ -0,0 +1,4 @@
+#pragma once
+#include <llama.h>

package/src/llama.cpp/cmake/arm64-apple-clang.cmake ADDED Viewed

@@ -0,0 +1,16 @@
+set( CMAKE_SYSTEM_NAME Darwin )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+set( target arm64-apple-darwin-macho )
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
+set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
+set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags} ${warn_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )

package/src/llama.cpp/cmake/common.cmake ADDED Viewed

@@ -0,0 +1,33 @@
+function(llama_add_compile_flags)
+    if (LLAMA_FATAL_WARNINGS)
+        if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+            list(APPEND C_FLAGS   -Werror)
+            list(APPEND CXX_FLAGS -Werror)
+        elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+            add_compile_options(/WX)
+        endif()
+    endif()
+    if (LLAMA_ALL_WARNINGS)
+        if (NOT MSVC)
+            list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
+                                -Werror=implicit-int -Werror=implicit-function-declaration)
+            list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
+            list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+            list(APPEND C_FLAGS   ${WARNING_FLAGS})
+            list(APPEND CXX_FLAGS ${WARNING_FLAGS})
+            ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
+            add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
+                                "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
+        else()
+            # todo : msvc
+            set(C_FLAGS   "" PARENT_SCOPE)
+            set(CXX_FLAGS "" PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()

package/src/llama.cpp/cmake/x64-windows-llvm.cmake ADDED Viewed

@@ -0,0 +1,11 @@
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR x86_64 )
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+set( arch_c_flags "-march=native" )
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )

package/src/llama.cpp/common/CMakeLists.txt CHANGED Viewed

@@ -2,6 +2,8 @@
 find_package(Threads REQUIRED)
+llama_add_compile_flags()
 # Build info header
 #
@@ -66,8 +68,8 @@ add_library(${TARGET} STATIC
     ngram-cache.h
     sampling.cpp
     sampling.h
-    train.cpp
-    train.h
+    speculative.cpp
+    speculative.h
     )
 if (BUILD_SHARED_LIBS)
@@ -79,12 +81,12 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
 # Use curl to download model url
 if (LLAMA_CURL)
     find_package(CURL REQUIRED)
-    add_definitions(-DLLAMA_USE_CURL)
+    target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
     include_directories(${CURL_INCLUDE_DIRS})
     find_library(CURL_LIBRARY curl REQUIRED)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
 endif ()
 target_include_directories(${TARGET} PUBLIC .)
-target_compile_features   (${TARGET} PUBLIC cxx_std_11)
+target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)