@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
|
|
|
78
78
|
|
|
79
79
|
// function to be used by test-arg-parser
|
|
80
80
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
81
|
+
bool common_has_curl();
|
|
82
|
+
|
|
83
|
+
struct common_remote_params {
|
|
84
|
+
std::vector<std::string> headers;
|
|
85
|
+
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
|
|
86
|
+
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
|
|
87
|
+
};
|
|
88
|
+
// get remote file content, returns <http_code, raw_response_body>
|
|
89
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
|
|
@@ -6,6 +6,15 @@
|
|
|
6
6
|
|
|
7
7
|
#include <optional>
|
|
8
8
|
|
|
9
|
+
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
|
|
10
|
+
auto time = std::chrono::system_clock::to_time_t(now);
|
|
11
|
+
auto local_time = *std::localtime(&time);
|
|
12
|
+
std::ostringstream ss;
|
|
13
|
+
ss << std::put_time(&local_time, format.c_str());
|
|
14
|
+
auto res = ss.str();
|
|
15
|
+
return res;
|
|
16
|
+
}
|
|
17
|
+
|
|
9
18
|
typedef minja::chat_template common_chat_template;
|
|
10
19
|
|
|
11
20
|
struct common_chat_templates {
|
|
@@ -24,6 +33,7 @@ struct templates_params {
|
|
|
24
33
|
std::string grammar;
|
|
25
34
|
bool add_generation_prompt = true;
|
|
26
35
|
bool extract_reasoning = true;
|
|
36
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
27
37
|
};
|
|
28
38
|
|
|
29
39
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
|
|
@@ -125,7 +135,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
125
135
|
msgs.push_back(msg);
|
|
126
136
|
}
|
|
127
137
|
} catch (const std::exception & e) {
|
|
128
|
-
|
|
138
|
+
// @ngxson : disable otherwise it's bloating the API response
|
|
139
|
+
// printf("%s\n", std::string("; messages = ") + messages.dump(2));
|
|
140
|
+
throw std::runtime_error("Failed to parse messages: " + std::string(e.what()));
|
|
129
141
|
}
|
|
130
142
|
|
|
131
143
|
return msgs;
|
|
@@ -937,78 +949,83 @@ static void expect_tool_parameters(const std::string & name, const json & parame
|
|
|
937
949
|
}
|
|
938
950
|
}
|
|
939
951
|
|
|
940
|
-
static common_chat_params
|
|
952
|
+
static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
|
|
941
953
|
auto builtin_tools = json::array();
|
|
942
954
|
common_chat_params data;
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
955
|
+
if (!inputs.tools.is_null()) {
|
|
956
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
957
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
958
|
+
std::vector<std::string> tool_rules;
|
|
946
959
|
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
960
|
+
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
|
961
|
+
if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
|
|
962
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
|
963
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
|
964
|
+
expect_tool_parameters(name, parameters, {"query"});
|
|
965
|
+
} else if (name == "python" || name == "code_interpreter") {
|
|
966
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
|
967
|
+
expect_tool_parameters(name, parameters, {"code"});
|
|
968
|
+
} else {
|
|
969
|
+
return false;
|
|
970
|
+
}
|
|
958
971
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
972
|
+
std::vector<std::string> kvs;
|
|
973
|
+
for (const auto & [key, value] : parameters.at("properties").items()) {
|
|
974
|
+
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
|
|
975
|
+
}
|
|
963
976
|
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
977
|
+
tool_rules.push_back(
|
|
978
|
+
builder.add_rule(
|
|
979
|
+
name + "-call",
|
|
980
|
+
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
|
981
|
+
builtin_tools.push_back(name);
|
|
969
982
|
|
|
970
|
-
|
|
971
|
-
|
|
983
|
+
return true;
|
|
984
|
+
};
|
|
972
985
|
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
986
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
987
|
+
const auto & function = tool.at("function");
|
|
988
|
+
std::string name = function.at("name");
|
|
989
|
+
auto parameters = function.at("parameters");
|
|
990
|
+
builder.resolve_refs(parameters);
|
|
978
991
|
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
992
|
+
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
|
993
|
+
if (allow_python_tag_builtin_tools) {
|
|
994
|
+
handle_builtin_tool(name, parameters);
|
|
995
|
+
}
|
|
996
|
+
tool_rules.push_back(
|
|
997
|
+
builder.add_rule(
|
|
998
|
+
name + "-call",
|
|
999
|
+
"\"{\" space "
|
|
1000
|
+
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
|
1001
|
+
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
|
1002
|
+
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
|
1003
|
+
"\"}\" space"));
|
|
1004
|
+
});
|
|
1005
|
+
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
|
|
1006
|
+
data.grammar_triggers.push_back({
|
|
1007
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
|
|
1008
|
+
"\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
|
|
1009
|
+
});
|
|
1010
|
+
if (!builtin_tools.empty()) {
|
|
1011
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
1012
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
|
982
1013
|
}
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
"\"{\" space "
|
|
987
|
-
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
|
988
|
-
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
|
989
|
-
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
|
990
|
-
"\"}\" space"));
|
|
1014
|
+
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
|
1015
|
+
builder.add_rule("root", string_join(tool_rules, " | "));
|
|
1016
|
+
data.additional_stops.push_back("<|eom_id|>");
|
|
991
1017
|
});
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
999
|
-
data.preserved_tokens.push_back("<|python_tag|>");
|
|
1000
|
-
}
|
|
1001
|
-
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
|
1002
|
-
builder.add_rule("root", string_join(tool_rules, " | "));
|
|
1003
|
-
});
|
|
1004
|
-
data.additional_stops.push_back("<|eom_id|>");
|
|
1018
|
+
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
|
1019
|
+
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
|
1020
|
+
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
|
1021
|
+
} else {
|
|
1022
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
1023
|
+
}
|
|
1005
1024
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
|
1025
|
+
{"date_string", format_time(inputs.now, "%d %b %Y")},
|
|
1006
1026
|
{"tools_in_user_message", false},
|
|
1007
1027
|
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
|
1008
1028
|
});
|
|
1009
|
-
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
|
1010
|
-
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
|
1011
|
-
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
|
1012
1029
|
return data;
|
|
1013
1030
|
}
|
|
1014
1031
|
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
|
@@ -1148,7 +1165,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|
|
1148
1165
|
LOG_DBG("%s\n", __func__);
|
|
1149
1166
|
common_chat_params data;
|
|
1150
1167
|
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
|
1151
|
-
{"datetime", "
|
|
1168
|
+
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
|
|
1152
1169
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
1153
1170
|
});
|
|
1154
1171
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
@@ -1283,55 +1300,59 @@ static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & in
|
|
|
1283
1300
|
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1284
1301
|
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
|
1285
1302
|
common_chat_params data;
|
|
1286
|
-
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
|
1287
|
-
std::string python_code_argument_name;
|
|
1288
|
-
auto has_raw_python = false;
|
|
1289
1303
|
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
std::string
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1304
|
+
if (!inputs.tools.is_null()) {
|
|
1305
|
+
std::string python_code_argument_name;
|
|
1306
|
+
auto has_raw_python = false;
|
|
1307
|
+
|
|
1308
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1309
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1310
|
+
std::vector<std::string> tool_rules;
|
|
1311
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1312
|
+
const auto & function = tool.at("function");
|
|
1313
|
+
const auto & parameters = function.at("parameters");
|
|
1314
|
+
std::string name = function.at("name");
|
|
1315
|
+
if (name == "python" || name == "ipython") {
|
|
1316
|
+
if (!parameters.contains("type")) {
|
|
1317
|
+
throw std::runtime_error("Missing type in python tool");
|
|
1318
|
+
}
|
|
1319
|
+
has_raw_python = true;
|
|
1320
|
+
const auto & type = parameters.at("type");
|
|
1321
|
+
if (type == "object") {
|
|
1322
|
+
auto properties = parameters.at("properties");
|
|
1323
|
+
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
|
1324
|
+
if (it.value().at("type") == "string") {
|
|
1325
|
+
if (!python_code_argument_name.empty()) {
|
|
1326
|
+
throw std::runtime_error("Multiple string arguments found in python tool");
|
|
1327
|
+
}
|
|
1328
|
+
python_code_argument_name = it.key();
|
|
1309
1329
|
}
|
|
1310
|
-
python_code_argument_name = it.key();
|
|
1311
1330
|
}
|
|
1331
|
+
if (python_code_argument_name.empty()) {
|
|
1332
|
+
throw std::runtime_error("No string argument found in python tool");
|
|
1333
|
+
}
|
|
1334
|
+
} else if (type != "string") {
|
|
1335
|
+
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
|
1312
1336
|
}
|
|
1313
|
-
if (python_code_argument_name.empty()) {
|
|
1314
|
-
throw std::runtime_error("No string argument found in python tool");
|
|
1315
|
-
}
|
|
1316
|
-
} else if (type != "string") {
|
|
1317
|
-
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
|
1318
1337
|
}
|
|
1338
|
+
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
|
1339
|
+
});
|
|
1340
|
+
if (has_raw_python) {
|
|
1341
|
+
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
|
1342
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
1343
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
|
1319
1344
|
}
|
|
1320
|
-
|
|
1345
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
|
1346
|
+
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
1347
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
|
1321
1348
|
});
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
}
|
|
1327
|
-
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
|
1328
|
-
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
1329
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
|
1330
|
-
});
|
|
1349
|
+
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
|
1350
|
+
} else {
|
|
1351
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
1352
|
+
}
|
|
1331
1353
|
|
|
1332
1354
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1333
1355
|
// TODO: if (has_raw_python)
|
|
1334
|
-
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
|
1335
1356
|
return data;
|
|
1336
1357
|
}
|
|
1337
1358
|
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
|
@@ -1591,6 +1612,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1591
1612
|
params.extract_reasoning = inputs.extract_reasoning;
|
|
1592
1613
|
params.tool_choice = inputs.tool_choice;
|
|
1593
1614
|
params.grammar = inputs.grammar;
|
|
1615
|
+
params.now = inputs.now;
|
|
1594
1616
|
if (!inputs.json_schema.empty()) {
|
|
1595
1617
|
params.json_schema = json::parse(inputs.json_schema);
|
|
1596
1618
|
}
|
|
@@ -1622,7 +1644,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1622
1644
|
}
|
|
1623
1645
|
|
|
1624
1646
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
1625
|
-
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
|
1647
|
+
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) {
|
|
1626
1648
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
1627
1649
|
}
|
|
1628
1650
|
|
|
@@ -1642,21 +1664,21 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1642
1664
|
return common_chat_params_init_firefunction_v2(tmpl, params);
|
|
1643
1665
|
}
|
|
1644
1666
|
|
|
1645
|
-
// Plain handler (no tools)
|
|
1646
|
-
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1647
|
-
return common_chat_params_init_without_tools(tmpl, params);
|
|
1648
|
-
}
|
|
1649
|
-
|
|
1650
1667
|
// Functionary v3.1 (w/ tools)
|
|
1651
1668
|
if (src.find("<|start_header_id|>") != std::string::npos
|
|
1652
1669
|
&& src.find("<function=") != std::string::npos) {
|
|
1653
1670
|
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
|
|
1654
1671
|
}
|
|
1655
1672
|
|
|
1656
|
-
// Llama 3.1, 3.2, 3.3 (w/ tools)
|
|
1673
|
+
// Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
|
|
1657
1674
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
|
1658
1675
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
|
1659
|
-
return
|
|
1676
|
+
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
// Plain handler (no tools)
|
|
1680
|
+
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1681
|
+
return common_chat_params_init_without_tools(tmpl, params);
|
|
1660
1682
|
}
|
|
1661
1683
|
|
|
1662
1684
|
// Mistral Nemo (w/ tools)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include <string>
|
|
7
8
|
#include <vector>
|
|
8
9
|
|
|
@@ -71,6 +72,7 @@ struct common_chat_templates_inputs {
|
|
|
71
72
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
72
73
|
bool parallel_tool_calls = false;
|
|
73
74
|
bool extract_reasoning = true;
|
|
75
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
74
76
|
};
|
|
75
77
|
|
|
76
78
|
struct common_chat_params {
|