@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
#include <set>
|
|
8
8
|
#include <string>
|
|
9
|
+
#include <string_view>
|
|
9
10
|
#include <vector>
|
|
10
11
|
#include <sstream>
|
|
11
12
|
|
|
@@ -66,7 +67,6 @@ enum llama_example {
|
|
|
66
67
|
LLAMA_EXAMPLE_COMMON,
|
|
67
68
|
LLAMA_EXAMPLE_SPECULATIVE,
|
|
68
69
|
LLAMA_EXAMPLE_MAIN,
|
|
69
|
-
LLAMA_EXAMPLE_INFILL,
|
|
70
70
|
LLAMA_EXAMPLE_EMBEDDING,
|
|
71
71
|
LLAMA_EXAMPLE_PERPLEXITY,
|
|
72
72
|
LLAMA_EXAMPLE_RETRIEVAL,
|
|
@@ -96,6 +96,7 @@ enum common_sampler_type {
|
|
|
96
96
|
COMMON_SAMPLER_TYPE_XTC = 8,
|
|
97
97
|
COMMON_SAMPLER_TYPE_INFILL = 9,
|
|
98
98
|
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
|
99
|
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
|
|
99
100
|
};
|
|
100
101
|
|
|
101
102
|
// dimensionality reduction methods, used by cvector-generator
|
|
@@ -121,10 +122,6 @@ struct common_grammar_trigger {
|
|
|
121
122
|
common_grammar_trigger_type type;
|
|
122
123
|
std::string value;
|
|
123
124
|
llama_token token = LLAMA_TOKEN_NULL;
|
|
124
|
-
|
|
125
|
-
// T can only be nlohmann::ordered_json
|
|
126
|
-
template <class T> T to_json() const;
|
|
127
|
-
template <class T> static common_grammar_trigger from_json(const T & in);
|
|
128
125
|
};
|
|
129
126
|
|
|
130
127
|
// sampling parameters
|
|
@@ -165,6 +162,7 @@ struct common_params_sampling {
|
|
|
165
162
|
std::vector<enum common_sampler_type> samplers = {
|
|
166
163
|
COMMON_SAMPLER_TYPE_PENALTIES,
|
|
167
164
|
COMMON_SAMPLER_TYPE_DRY,
|
|
165
|
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA,
|
|
168
166
|
COMMON_SAMPLER_TYPE_TOP_K,
|
|
169
167
|
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
|
170
168
|
COMMON_SAMPLER_TYPE_TOP_P,
|
|
@@ -184,6 +182,13 @@ struct common_params_sampling {
|
|
|
184
182
|
std::string print() const;
|
|
185
183
|
};
|
|
186
184
|
|
|
185
|
+
struct common_params_model {
|
|
186
|
+
std::string path = ""; // model local path // NOLINT
|
|
187
|
+
std::string url = ""; // model url to download // NOLINT
|
|
188
|
+
std::string hf_repo = ""; // HF repo // NOLINT
|
|
189
|
+
std::string hf_file = ""; // HF file // NOLINT
|
|
190
|
+
};
|
|
191
|
+
|
|
187
192
|
struct common_params_speculative {
|
|
188
193
|
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
|
189
194
|
|
|
@@ -197,19 +202,11 @@ struct common_params_speculative {
|
|
|
197
202
|
struct cpu_params cpuparams;
|
|
198
203
|
struct cpu_params cpuparams_batch;
|
|
199
204
|
|
|
200
|
-
|
|
201
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
202
|
-
|
|
203
|
-
std::string model = ""; // draft model for speculative decoding // NOLINT
|
|
204
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
205
|
+
struct common_params_model model;
|
|
205
206
|
};
|
|
206
207
|
|
|
207
208
|
struct common_params_vocoder {
|
|
208
|
-
|
|
209
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
210
|
-
|
|
211
|
-
std::string model = ""; // model path // NOLINT
|
|
212
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
209
|
+
struct common_params_model model;
|
|
213
210
|
|
|
214
211
|
std::string speaker_file = ""; // speaker file path // NOLINT
|
|
215
212
|
|
|
@@ -267,12 +264,10 @@ struct common_params {
|
|
|
267
264
|
struct common_params_speculative speculative;
|
|
268
265
|
struct common_params_vocoder vocoder;
|
|
269
266
|
|
|
270
|
-
|
|
267
|
+
struct common_params_model model;
|
|
268
|
+
|
|
271
269
|
std::string model_alias = ""; // model alias // NOLINT
|
|
272
|
-
std::string model_url = ""; // model url to download // NOLINT
|
|
273
270
|
std::string hf_token = ""; // HF token // NOLINT
|
|
274
|
-
std::string hf_repo = ""; // HF repo // NOLINT
|
|
275
|
-
std::string hf_file = ""; // HF file // NOLINT
|
|
276
271
|
std::string prompt = ""; // NOLINT
|
|
277
272
|
std::string system_prompt = ""; // NOLINT
|
|
278
273
|
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
|
@@ -286,6 +281,7 @@ struct common_params {
|
|
|
286
281
|
std::vector<std::string> in_files; // all input files
|
|
287
282
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
|
288
283
|
std::vector<llama_model_kv_override> kv_overrides;
|
|
284
|
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
|
289
285
|
|
|
290
286
|
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
|
291
287
|
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
|
@@ -329,7 +325,6 @@ struct common_params {
|
|
|
329
325
|
bool ctx_shift = true; // context shift on inifinite text generation
|
|
330
326
|
|
|
331
327
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
|
332
|
-
bool logits_all = false; // return logits for all tokens in the batch
|
|
333
328
|
bool use_mmap = true; // use mmap for faster loads
|
|
334
329
|
bool use_mlock = false; // use mlock to keep model in memory
|
|
335
330
|
bool verbose_prompt = false; // print prompt tokens before generation
|
|
@@ -338,6 +333,7 @@ struct common_params {
|
|
|
338
333
|
bool no_kv_offload = false; // disable KV offloading
|
|
339
334
|
bool warmup = true; // warmup run
|
|
340
335
|
bool check_tensors = false; // validate tensor data
|
|
336
|
+
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
|
341
337
|
|
|
342
338
|
bool single_turn = false; // single turn chat conversation
|
|
343
339
|
|
|
@@ -346,8 +342,10 @@ struct common_params {
|
|
|
346
342
|
|
|
347
343
|
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
|
348
344
|
|
|
349
|
-
// multimodal models (see
|
|
350
|
-
|
|
345
|
+
// multimodal models (see tools/mtmd)
|
|
346
|
+
struct common_params_model mmproj;
|
|
347
|
+
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
|
348
|
+
bool no_mmproj = false; // explicitly disable multimodal model
|
|
351
349
|
std::vector<std::string> image; // path to image file(s)
|
|
352
350
|
|
|
353
351
|
// embedding
|
|
@@ -370,6 +368,7 @@ struct common_params {
|
|
|
370
368
|
bool use_jinja = false; // NOLINT
|
|
371
369
|
bool enable_chat_template = true;
|
|
372
370
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
371
|
+
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
|
373
372
|
|
|
374
373
|
std::vector<std::string> api_keys;
|
|
375
374
|
|
|
@@ -413,13 +412,14 @@ struct common_params {
|
|
|
413
412
|
|
|
414
413
|
bool process_output = false; // collect data for the output tensor
|
|
415
414
|
bool compute_ppl = true; // whether to compute perplexity
|
|
415
|
+
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
|
|
416
416
|
|
|
417
417
|
// cvector-generator params
|
|
418
418
|
int n_pca_batch = 100;
|
|
419
419
|
int n_pca_iterations = 1000;
|
|
420
420
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
|
421
|
-
std::string cvector_positive_file = "
|
|
422
|
-
std::string cvector_negative_file = "
|
|
421
|
+
std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
|
|
422
|
+
std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
|
|
423
423
|
|
|
424
424
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
|
425
425
|
|
|
@@ -505,10 +505,9 @@ static bool string_starts_with(const std::string & str,
|
|
|
505
505
|
return str.rfind(prefix, 0) == 0;
|
|
506
506
|
}
|
|
507
507
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
}
|
|
508
|
+
// While we wait for C++20's std::string::ends_with...
|
|
509
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
|
|
510
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
|
|
512
511
|
|
|
513
512
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
|
514
513
|
void string_process_escapes(std::string & input);
|
|
@@ -546,26 +545,11 @@ struct llama_model_params common_model_params_to_llama ( common_params
|
|
|
546
545
|
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
|
547
546
|
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
|
548
547
|
|
|
549
|
-
struct llama_model * common_load_model_from_url(
|
|
550
|
-
const std::string & model_url,
|
|
551
|
-
const std::string & local_path,
|
|
552
|
-
const std::string & hf_token,
|
|
553
|
-
const struct llama_model_params & params);
|
|
554
|
-
|
|
555
|
-
struct llama_model * common_load_model_from_hf(
|
|
556
|
-
const std::string & repo,
|
|
557
|
-
const std::string & remote_path,
|
|
558
|
-
const std::string & local_path,
|
|
559
|
-
const std::string & hf_token,
|
|
560
|
-
const struct llama_model_params & params);
|
|
561
|
-
|
|
562
|
-
std::pair<std::string, std::string> common_get_hf_file(
|
|
563
|
-
const std::string & hf_repo_with_tag,
|
|
564
|
-
const std::string & hf_token);
|
|
565
|
-
|
|
566
548
|
// clear LoRA adapters from context, then apply new list of adapters
|
|
567
549
|
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
|
568
550
|
|
|
551
|
+
std::string get_model_endpoint();
|
|
552
|
+
|
|
569
553
|
//
|
|
570
554
|
// Batch utils
|
|
571
555
|
//
|
|
@@ -683,3 +667,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
|
|
683
667
|
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
|
684
668
|
|
|
685
669
|
}
|
|
670
|
+
|
|
671
|
+
//
|
|
672
|
+
// training utils
|
|
673
|
+
//
|
|
674
|
+
|
|
675
|
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);
|
|
@@ -16,6 +16,9 @@ using json = nlohmann::ordered_json;
|
|
|
16
16
|
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
|
17
17
|
auto has_max = max_items != std::numeric_limits<int>::max();
|
|
18
18
|
|
|
19
|
+
if (max_items == 0) {
|
|
20
|
+
return "";
|
|
21
|
+
}
|
|
19
22
|
if (min_items == 0 && max_items == 1) {
|
|
20
23
|
return item_rule + "?";
|
|
21
24
|
}
|
|
@@ -11,25 +11,24 @@ struct llama_sampler_llg {
|
|
|
11
11
|
std::string grammar_kind;
|
|
12
12
|
std::string grammar_data;
|
|
13
13
|
LlgTokenizer * tokenizer;
|
|
14
|
-
|
|
15
|
-
LlgMaskResult llg_res;
|
|
16
|
-
bool has_llg_res;
|
|
14
|
+
LlgMatcher * grammar;
|
|
17
15
|
};
|
|
18
16
|
|
|
19
|
-
static
|
|
20
|
-
|
|
17
|
+
static LlgMatcher * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind,
|
|
18
|
+
const char * grammar_data) {
|
|
21
19
|
LlgConstraintInit cinit;
|
|
22
20
|
llg_constraint_init_set_defaults(&cinit, tokenizer);
|
|
23
21
|
const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL");
|
|
24
22
|
if (log_level && *log_level) {
|
|
25
23
|
cinit.log_stderr_level = atoi(log_level);
|
|
26
24
|
}
|
|
27
|
-
auto c =
|
|
28
|
-
if (
|
|
29
|
-
LOG_ERR("llg error: %s\n",
|
|
30
|
-
|
|
25
|
+
auto c = llg_new_matcher(&cinit, grammar_kind, grammar_data);
|
|
26
|
+
if (llg_matcher_get_error(c)) {
|
|
27
|
+
LOG_ERR("llg error: %s\n", llg_matcher_get_error(c));
|
|
28
|
+
llg_free_matcher(c);
|
|
31
29
|
return nullptr;
|
|
32
30
|
}
|
|
31
|
+
|
|
33
32
|
return c;
|
|
34
33
|
}
|
|
35
34
|
|
|
@@ -40,39 +39,29 @@ static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) {
|
|
|
40
39
|
static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) {
|
|
41
40
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
42
41
|
if (ctx->grammar) {
|
|
43
|
-
|
|
44
|
-
llg_commit_token(ctx->grammar, token, &res);
|
|
45
|
-
ctx->has_llg_res = false;
|
|
42
|
+
llg_matcher_consume_token(ctx->grammar, token);
|
|
46
43
|
}
|
|
47
44
|
}
|
|
48
45
|
|
|
49
46
|
static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) {
|
|
50
47
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
51
48
|
if (ctx->grammar) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
49
|
+
const uint32_t * mask = llg_matcher_get_mask(ctx->grammar);
|
|
50
|
+
if (mask == nullptr) {
|
|
51
|
+
if (llg_matcher_compute_mask(ctx->grammar) == 0) {
|
|
52
|
+
mask = llg_matcher_get_mask(ctx->grammar);
|
|
55
53
|
} else {
|
|
56
|
-
LOG_ERR("llg error: %s\n",
|
|
57
|
-
|
|
54
|
+
LOG_ERR("llg error: %s\n", llg_matcher_get_error(ctx->grammar));
|
|
55
|
+
llg_free_matcher(ctx->grammar);
|
|
58
56
|
ctx->grammar = nullptr;
|
|
57
|
+
return;
|
|
59
58
|
}
|
|
60
59
|
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
} else {
|
|
69
|
-
const uint32_t * mask = ctx->llg_res.sample_mask;
|
|
70
|
-
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
71
|
-
auto token = cur_p->data[i].id;
|
|
72
|
-
if ((mask[token / 32] & (1 << (token % 32))) == 0) {
|
|
73
|
-
cur_p->data[i].logit = -INFINITY;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
60
|
+
|
|
61
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
62
|
+
auto token = cur_p->data[i].id;
|
|
63
|
+
if ((mask[token / 32] & (1 << (token % 32))) == 0) {
|
|
64
|
+
cur_p->data[i].logit = -INFINITY;
|
|
76
65
|
}
|
|
77
66
|
}
|
|
78
67
|
}
|
|
@@ -80,14 +69,9 @@ static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array
|
|
|
80
69
|
|
|
81
70
|
static void llama_sampler_llg_reset(llama_sampler * smpl) {
|
|
82
71
|
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
83
|
-
if (
|
|
84
|
-
|
|
72
|
+
if (ctx->grammar) {
|
|
73
|
+
llg_matcher_reset(ctx->grammar);
|
|
85
74
|
}
|
|
86
|
-
|
|
87
|
-
auto * grammar_new = llama_sampler_llg_new(ctx->tokenizer, ctx->grammar_kind.c_str(), ctx->grammar_data.c_str());
|
|
88
|
-
llg_free_constraint(ctx->grammar);
|
|
89
|
-
ctx->grammar = grammar_new;
|
|
90
|
-
ctx->has_llg_res = false;
|
|
91
75
|
}
|
|
92
76
|
|
|
93
77
|
static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
|
|
@@ -102,7 +86,7 @@ static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
|
|
|
102
86
|
if (ctx->grammar) {
|
|
103
87
|
result_ctx->grammar_kind = ctx->grammar_kind;
|
|
104
88
|
result_ctx->grammar_data = ctx->grammar_data;
|
|
105
|
-
result_ctx->grammar =
|
|
89
|
+
result_ctx->grammar = llg_clone_matcher(ctx->grammar);
|
|
106
90
|
result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer);
|
|
107
91
|
}
|
|
108
92
|
}
|
|
@@ -114,7 +98,7 @@ static void llama_sampler_llg_free(llama_sampler * smpl) {
|
|
|
114
98
|
const auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
115
99
|
|
|
116
100
|
if (ctx->grammar) {
|
|
117
|
-
|
|
101
|
+
llg_free_matcher(ctx->grammar);
|
|
118
102
|
llg_free_tokenizer(ctx->tokenizer);
|
|
119
103
|
}
|
|
120
104
|
|
|
@@ -205,6 +189,7 @@ static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab)
|
|
|
205
189
|
/* .tokenize_fn = */ llama_sampler_llg_tokenize_fn,
|
|
206
190
|
/* .use_approximate_greedy_tokenize_fn = */ false,
|
|
207
191
|
/* .tokenize_user_data = */ vocab,
|
|
192
|
+
/* .slices = */ nullptr,
|
|
208
193
|
};
|
|
209
194
|
|
|
210
195
|
char error_buffer[1024];
|
|
@@ -239,9 +224,11 @@ llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * g
|
|
|
239
224
|
/* .grammar_data = */ grammar_data,
|
|
240
225
|
/* .tokenizer = */ tokenizer,
|
|
241
226
|
/* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data),
|
|
242
|
-
/* .llg_res = */ {},
|
|
243
|
-
/* .has_llg_res = */ false,
|
|
244
227
|
};
|
|
228
|
+
if (ctx->grammar) {
|
|
229
|
+
GGML_ASSERT(((size_t) llama_vocab_n_tokens(vocab) + 31) / 32 * 4 ==
|
|
230
|
+
llg_matcher_get_mask_byte_size(ctx->grammar));
|
|
231
|
+
}
|
|
245
232
|
} else {
|
|
246
233
|
*ctx = {
|
|
247
234
|
/* .vocab = */ vocab,
|
|
@@ -249,15 +236,12 @@ llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * g
|
|
|
249
236
|
/* .grammar_data = */ {},
|
|
250
237
|
/* .tokenizer = */ nullptr,
|
|
251
238
|
/* .grammar = */ nullptr,
|
|
252
|
-
/* .llg_res = */ {},
|
|
253
|
-
/* .has_llg_res = */ false,
|
|
254
239
|
};
|
|
255
240
|
}
|
|
256
241
|
|
|
257
242
|
return llama_sampler_init(
|
|
258
243
|
/* .iface = */ &llama_sampler_llg_i,
|
|
259
|
-
/* .ctx = */ ctx
|
|
260
|
-
);
|
|
244
|
+
/* .ctx = */ ctx);
|
|
261
245
|
}
|
|
262
246
|
|
|
263
247
|
#else
|
|
@@ -9,10 +9,21 @@
|
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
11
|
#include "minja.hpp"
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
#include <chrono>
|
|
14
|
+
#include <cstddef>
|
|
15
|
+
#include <cstdio>
|
|
16
|
+
#include <ctime>
|
|
17
|
+
#include <exception>
|
|
18
|
+
#include <iomanip>
|
|
19
|
+
#include <memory>
|
|
20
|
+
#include <sstream>
|
|
21
|
+
#include <stdexcept>
|
|
13
22
|
#include <string>
|
|
14
23
|
#include <vector>
|
|
15
24
|
|
|
25
|
+
#include <json.hpp>
|
|
26
|
+
|
|
16
27
|
using json = nlohmann::ordered_json;
|
|
17
28
|
|
|
18
29
|
namespace minja {
|
|
@@ -384,8 +395,8 @@ class chat_template {
|
|
|
384
395
|
|
|
385
396
|
for (const auto & message_ : adjusted_messages) {
|
|
386
397
|
auto message = message_;
|
|
387
|
-
if (!message.contains("role") || !message.contains("content")) {
|
|
388
|
-
throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
|
|
398
|
+
if (!message.contains("role") || (!message.contains("content") && !message.contains("tool_calls"))) {
|
|
399
|
+
throw std::runtime_error("message must have 'role' and one of 'content' or 'tool_calls' fields: " + message.dump());
|
|
389
400
|
}
|
|
390
401
|
std::string role = message.at("role");
|
|
391
402
|
|
|
@@ -406,7 +417,6 @@ class chat_template {
|
|
|
406
417
|
}
|
|
407
418
|
}
|
|
408
419
|
if (polyfill_tool_calls) {
|
|
409
|
-
auto content = message.at("content");
|
|
410
420
|
auto tool_calls = json::array();
|
|
411
421
|
for (const auto & tool_call : message.at("tool_calls")) {
|
|
412
422
|
if (tool_call.at("type") != "function") {
|
|
@@ -425,8 +435,11 @@ class chat_template {
|
|
|
425
435
|
auto obj = json {
|
|
426
436
|
{"tool_calls", tool_calls},
|
|
427
437
|
};
|
|
428
|
-
if (
|
|
429
|
-
|
|
438
|
+
if (message.contains("content")) {
|
|
439
|
+
auto content = message.at("content");
|
|
440
|
+
if (!content.is_null() && !content.empty()) {
|
|
441
|
+
obj["content"] = content;
|
|
442
|
+
}
|
|
430
443
|
}
|
|
431
444
|
message["content"] = obj.dump(2);
|
|
432
445
|
message.erase("tool_calls");
|
|
@@ -435,13 +448,12 @@ class chat_template {
|
|
|
435
448
|
if (polyfill_tool_responses && role == "tool") {
|
|
436
449
|
message["role"] = "user";
|
|
437
450
|
auto obj = json {
|
|
438
|
-
{"tool_response",
|
|
439
|
-
{"content", message.at("content")},
|
|
440
|
-
}},
|
|
451
|
+
{"tool_response", json::object()},
|
|
441
452
|
};
|
|
442
453
|
if (message.contains("name")) {
|
|
443
|
-
obj["tool_response"]["
|
|
454
|
+
obj["tool_response"]["tool"] = message.at("name");
|
|
444
455
|
}
|
|
456
|
+
obj["tool_response"]["content"] = message.at("content");
|
|
445
457
|
if (message.contains("tool_call_id")) {
|
|
446
458
|
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
|
|
447
459
|
}
|
|
@@ -510,7 +522,7 @@ class chat_template {
|
|
|
510
522
|
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
|
|
511
523
|
json messages_with_system = messages;
|
|
512
524
|
|
|
513
|
-
if (messages_with_system.
|
|
525
|
+
if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") {
|
|
514
526
|
std::string existing_system = messages_with_system.at(0).at("content");
|
|
515
527
|
messages_with_system[0] = json {
|
|
516
528
|
{"role", "system"},
|