@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -15,10 +15,10 @@ on:
|
|
|
15
15
|
push:
|
|
16
16
|
branches:
|
|
17
17
|
- master
|
|
18
|
-
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '
|
|
18
|
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
|
|
19
19
|
pull_request:
|
|
20
20
|
types: [opened, synchronize, reopened]
|
|
21
|
-
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', '
|
|
21
|
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
|
|
22
22
|
|
|
23
23
|
env:
|
|
24
24
|
LLAMA_LOG_COLORS: 1
|
|
@@ -74,7 +74,7 @@ jobs:
|
|
|
74
74
|
- name: Tests dependencies
|
|
75
75
|
id: test_dependencies
|
|
76
76
|
run: |
|
|
77
|
-
pip install -r
|
|
77
|
+
pip install -r tools/server/tests/requirements.txt
|
|
78
78
|
|
|
79
79
|
# Setup nodejs (to be used for verifying bundled index.html)
|
|
80
80
|
- uses: actions/setup-node@v4
|
|
@@ -84,14 +84,14 @@ jobs:
|
|
|
84
84
|
- name: WebUI - Install dependencies
|
|
85
85
|
id: webui_lint
|
|
86
86
|
run: |
|
|
87
|
-
cd
|
|
87
|
+
cd tools/server/webui
|
|
88
88
|
npm ci
|
|
89
89
|
|
|
90
90
|
- name: WebUI - Check code format
|
|
91
91
|
id: webui_format
|
|
92
92
|
run: |
|
|
93
93
|
git config --global --add safe.directory $(realpath .)
|
|
94
|
-
cd
|
|
94
|
+
cd tools/server/webui
|
|
95
95
|
git status
|
|
96
96
|
|
|
97
97
|
npm run format
|
|
@@ -108,7 +108,7 @@ jobs:
|
|
|
108
108
|
id: verify_server_index_html
|
|
109
109
|
run: |
|
|
110
110
|
git config --global --add safe.directory $(realpath .)
|
|
111
|
-
cd
|
|
111
|
+
cd tools/server/webui
|
|
112
112
|
git status
|
|
113
113
|
|
|
114
114
|
npm run build
|
|
@@ -129,7 +129,6 @@ jobs:
|
|
|
129
129
|
cmake -B build \
|
|
130
130
|
-DGGML_NATIVE=OFF \
|
|
131
131
|
-DLLAMA_BUILD_SERVER=ON \
|
|
132
|
-
-DLLAMA_CURL=ON \
|
|
133
132
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
|
134
133
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
|
135
134
|
-DGGML_OPENMP=OFF ;
|
|
@@ -142,7 +141,6 @@ jobs:
|
|
|
142
141
|
cmake -B build \
|
|
143
142
|
-DGGML_NATIVE=OFF \
|
|
144
143
|
-DLLAMA_BUILD_SERVER=ON \
|
|
145
|
-
-DLLAMA_CURL=ON \
|
|
146
144
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
|
147
145
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
|
148
146
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
@@ -154,7 +152,6 @@ jobs:
|
|
|
154
152
|
cmake -B build \
|
|
155
153
|
-DGGML_NATIVE=OFF \
|
|
156
154
|
-DLLAMA_BUILD_SERVER=ON \
|
|
157
|
-
-DLLAMA_CURL=ON \
|
|
158
155
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
|
159
156
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
160
157
|
|
|
@@ -164,21 +161,21 @@ jobs:
|
|
|
164
161
|
env:
|
|
165
162
|
GITHUB_ACTIONS: "true"
|
|
166
163
|
run: |
|
|
167
|
-
cd
|
|
164
|
+
cd tools/server/tests
|
|
168
165
|
./tests.sh
|
|
169
166
|
|
|
170
167
|
- name: Tests (sanitizers)
|
|
171
168
|
id: server_integration_tests_sanitizers
|
|
172
169
|
if: ${{ matrix.sanitizer != '' }}
|
|
173
170
|
run: |
|
|
174
|
-
cd
|
|
171
|
+
cd tools/server/tests
|
|
175
172
|
LLAMA_SANITIZE=1 ./tests.sh
|
|
176
173
|
|
|
177
174
|
- name: Slow tests
|
|
178
175
|
id: server_integration_tests_slow
|
|
179
176
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
180
177
|
run: |
|
|
181
|
-
cd
|
|
178
|
+
cd tools/server/tests
|
|
182
179
|
SLOW_TESTS=1 ./tests.sh
|
|
183
180
|
|
|
184
181
|
|
|
@@ -195,17 +192,14 @@ jobs:
|
|
|
195
192
|
|
|
196
193
|
- name: libCURL
|
|
197
194
|
id: get_libcurl
|
|
198
|
-
|
|
199
|
-
CURL_VERSION: 8.6.0_6
|
|
200
|
-
run: |
|
|
201
|
-
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
|
202
|
-
mkdir $env:RUNNER_TEMP/libcurl
|
|
203
|
-
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
|
195
|
+
uses: ./.github/actions/windows-setup-curl
|
|
204
196
|
|
|
205
197
|
- name: Build
|
|
206
198
|
id: cmake_build
|
|
199
|
+
env:
|
|
200
|
+
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
207
201
|
run: |
|
|
208
|
-
cmake -B build -
|
|
202
|
+
cmake -B build -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
|
|
209
203
|
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
|
210
204
|
|
|
211
205
|
- name: Python setup
|
|
@@ -217,18 +211,20 @@ jobs:
|
|
|
217
211
|
- name: Tests dependencies
|
|
218
212
|
id: test_dependencies
|
|
219
213
|
run: |
|
|
220
|
-
pip install -r
|
|
214
|
+
pip install -r tools/server/tests/requirements.txt
|
|
221
215
|
|
|
222
216
|
- name: Copy Libcurl
|
|
223
217
|
id: prepare_libcurl
|
|
218
|
+
env:
|
|
219
|
+
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
224
220
|
run: |
|
|
225
|
-
cp $env:
|
|
221
|
+
cp $env:CURL_PATH/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
|
226
222
|
|
|
227
223
|
- name: Tests
|
|
228
224
|
id: server_integration_tests
|
|
229
225
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
|
230
226
|
run: |
|
|
231
|
-
cd
|
|
227
|
+
cd tools/server/tests
|
|
232
228
|
$env:PYTHONIOENCODING = ":replace"
|
|
233
229
|
pytest -v -x -m "not slow"
|
|
234
230
|
|
|
@@ -236,6 +232,6 @@ jobs:
|
|
|
236
232
|
id: server_integration_tests_slow
|
|
237
233
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
238
234
|
run: |
|
|
239
|
-
cd
|
|
235
|
+
cd tools/server/tests
|
|
240
236
|
$env:SLOW_TESTS = "1"
|
|
241
237
|
pytest -v -x
|
|
@@ -77,11 +77,12 @@ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE
|
|
|
77
77
|
|
|
78
78
|
# extra artifacts
|
|
79
79
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
|
80
|
+
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
|
|
80
81
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
|
81
82
|
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|
82
83
|
|
|
83
84
|
# 3rd party libs
|
|
84
|
-
option(LLAMA_CURL
|
|
85
|
+
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
|
|
85
86
|
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
86
87
|
|
|
87
88
|
# Required for relocatable CMake package
|
|
@@ -168,6 +169,11 @@ add_subdirectory(src)
|
|
|
168
169
|
# utils, programs, examples and tests
|
|
169
170
|
#
|
|
170
171
|
|
|
172
|
+
if (NOT LLAMA_BUILD_COMMON)
|
|
173
|
+
message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
|
|
174
|
+
set(LLAMA_CURL OFF)
|
|
175
|
+
endif()
|
|
176
|
+
|
|
171
177
|
if (LLAMA_BUILD_COMMON)
|
|
172
178
|
add_subdirectory(common)
|
|
173
179
|
endif()
|
|
@@ -182,6 +188,10 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
|
182
188
|
add_subdirectory(pocs)
|
|
183
189
|
endif()
|
|
184
190
|
|
|
191
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
|
|
192
|
+
add_subdirectory(tools)
|
|
193
|
+
endif()
|
|
194
|
+
|
|
185
195
|
#
|
|
186
196
|
# install
|
|
187
197
|
#
|
|
@@ -41,14 +41,20 @@ endif()
|
|
|
41
41
|
|
|
42
42
|
if(MSVC)
|
|
43
43
|
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
|
|
44
|
-
|
|
44
|
+
if (CMAKE_VS_PLATFORM_NAME)
|
|
45
|
+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
|
46
|
+
else()
|
|
47
|
+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
|
|
48
|
+
endif()
|
|
45
49
|
else()
|
|
46
50
|
execute_process(
|
|
47
|
-
COMMAND
|
|
51
|
+
COMMAND ${CMAKE_C_COMPILER} --version
|
|
48
52
|
OUTPUT_VARIABLE OUT
|
|
49
53
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
|
50
54
|
)
|
|
55
|
+
string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
|
|
51
56
|
set(BUILD_COMPILER ${OUT})
|
|
57
|
+
|
|
52
58
|
execute_process(
|
|
53
59
|
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
|
54
60
|
OUTPUT_VARIABLE OUT
|
|
@@ -39,7 +39,9 @@ add_custom_command(
|
|
|
39
39
|
COMMENT "Generating build details from Git"
|
|
40
40
|
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
|
41
41
|
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
|
42
|
-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
|
42
|
+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
|
43
|
+
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
|
|
44
|
+
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
|
|
43
45
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
|
44
46
|
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
|
45
47
|
VERBATIM
|
|
@@ -71,6 +73,8 @@ add_library(${TARGET} STATIC
|
|
|
71
73
|
minja/minja.hpp
|
|
72
74
|
ngram-cache.cpp
|
|
73
75
|
ngram-cache.h
|
|
76
|
+
regex-partial.cpp
|
|
77
|
+
regex-partial.h
|
|
74
78
|
sampling.cpp
|
|
75
79
|
sampling.h
|
|
76
80
|
speculative.cpp
|
|
@@ -85,7 +89,10 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|
|
85
89
|
|
|
86
90
|
# Use curl to download model url
|
|
87
91
|
if (LLAMA_CURL)
|
|
88
|
-
find_package(CURL
|
|
92
|
+
find_package(CURL)
|
|
93
|
+
if (NOT CURL_FOUND)
|
|
94
|
+
message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
|
|
95
|
+
endif()
|
|
89
96
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
90
97
|
include_directories(${CURL_INCLUDE_DIRS})
|
|
91
98
|
find_library(CURL_LIBRARY curl REQUIRED)
|
|
@@ -114,8 +121,8 @@ if (LLAMA_LLGUIDANCE)
|
|
|
114
121
|
|
|
115
122
|
ExternalProject_Add(llguidance_ext
|
|
116
123
|
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
|
117
|
-
# v0.
|
|
118
|
-
GIT_TAG
|
|
124
|
+
# v0.7.20 (+ fix to build on GCC 15):
|
|
125
|
+
GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8
|
|
119
126
|
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
|
120
127
|
SOURCE_DIR ${LLGUIDANCE_SRC}
|
|
121
128
|
BUILD_IN_SOURCE TRUE
|
|
@@ -139,3 +146,27 @@ endif ()
|
|
|
139
146
|
target_include_directories(${TARGET} PUBLIC .)
|
|
140
147
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
141
148
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
#
|
|
152
|
+
# copy the license files
|
|
153
|
+
#
|
|
154
|
+
|
|
155
|
+
# Check if running in GitHub Actions
|
|
156
|
+
if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
|
|
157
|
+
message(STATUS "Running inside GitHub Actions - copying license files")
|
|
158
|
+
|
|
159
|
+
# Copy all files from licenses/ to build/bin/
|
|
160
|
+
file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
|
|
161
|
+
foreach(LICENSE_FILE ${LICENSE_FILES})
|
|
162
|
+
get_filename_component(FILENAME ${LICENSE_FILE} NAME)
|
|
163
|
+
add_custom_command(
|
|
164
|
+
POST_BUILD
|
|
165
|
+
TARGET ${TARGET}
|
|
166
|
+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
167
|
+
"${LICENSE_FILE}"
|
|
168
|
+
"$<TARGET_FILE_DIR:llama>/${FILENAME}"
|
|
169
|
+
COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
|
170
|
+
message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
|
|
171
|
+
endforeach()
|
|
172
|
+
endif()
|