@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -1,10 +1,125 @@
|
|
|
1
1
|
#include "chat.h"
|
|
2
|
+
#include "chat-parser.h"
|
|
3
|
+
#include "common.h"
|
|
2
4
|
#include "json-schema-to-grammar.h"
|
|
3
5
|
#include "log.h"
|
|
6
|
+
#include "json-partial.h"
|
|
4
7
|
#include "minja/chat-template.hpp"
|
|
5
8
|
#include "minja/minja.hpp"
|
|
9
|
+
#include "regex-partial.h"
|
|
6
10
|
|
|
11
|
+
#include <cstdio>
|
|
12
|
+
#include <exception>
|
|
13
|
+
#include <iostream>
|
|
7
14
|
#include <optional>
|
|
15
|
+
#include <stdexcept>
|
|
16
|
+
#include <string>
|
|
17
|
+
#include <vector>
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
|
|
21
|
+
auto time = std::chrono::system_clock::to_time_t(now);
|
|
22
|
+
auto local_time = *std::localtime(&time);
|
|
23
|
+
std::ostringstream ss;
|
|
24
|
+
ss << std::put_time(&local_time, format.c_str());
|
|
25
|
+
auto res = ss.str();
|
|
26
|
+
return res;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
static std::string string_diff(const std::string & last, const std::string & current) {
|
|
30
|
+
if (last.empty()) {
|
|
31
|
+
return current;
|
|
32
|
+
}
|
|
33
|
+
if (!string_starts_with(current, last)) {
|
|
34
|
+
if (string_starts_with(last, current)) {
|
|
35
|
+
// This happens if the last generation ended on a partial stop word (not erased),
|
|
36
|
+
// and the current ended on a stop word (erased).
|
|
37
|
+
return "";
|
|
38
|
+
}
|
|
39
|
+
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
|
|
40
|
+
}
|
|
41
|
+
return current.substr(last.size());
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
static bool has_content_or_tool_calls(const common_chat_msg & msg) {
|
|
45
|
+
return !msg.content.empty() || !msg.tool_calls.empty();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
template <>
|
|
49
|
+
json common_chat_msg::to_json_oaicompat() const
|
|
50
|
+
{
|
|
51
|
+
json message {
|
|
52
|
+
{"role", "assistant"},
|
|
53
|
+
};
|
|
54
|
+
if (!reasoning_content.empty()) {
|
|
55
|
+
message["reasoning_content"] = reasoning_content;
|
|
56
|
+
}
|
|
57
|
+
if (content.empty() && !tool_calls.empty()) {
|
|
58
|
+
message["content"] = json();
|
|
59
|
+
} else {
|
|
60
|
+
message["content"] = content;
|
|
61
|
+
}
|
|
62
|
+
if (!tool_calls.empty()) {
|
|
63
|
+
auto arr = json::array();
|
|
64
|
+
for (const auto & tc : tool_calls) {
|
|
65
|
+
arr.push_back({
|
|
66
|
+
{"type", "function"},
|
|
67
|
+
{"function", {
|
|
68
|
+
{"name", tc.name},
|
|
69
|
+
{"arguments", tc.arguments},
|
|
70
|
+
}},
|
|
71
|
+
{"id", tc.id},
|
|
72
|
+
// // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
|
|
73
|
+
// // We only generate a random id for the ones that don't generate one by themselves
|
|
74
|
+
// // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
|
|
75
|
+
// {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
message["tool_calls"] = arr;
|
|
79
|
+
}
|
|
80
|
+
return message;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
|
|
84
|
+
std::vector<common_chat_msg_diff> diffs;
|
|
85
|
+
// if (previous_msg.reasoning_content != current.reasoning_content) {
|
|
86
|
+
// auto & diff = diffs.emplace_back();
|
|
87
|
+
// diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, current.reasoning_content);
|
|
88
|
+
// }
|
|
89
|
+
if (previous_msg.content != new_msg.content) {
|
|
90
|
+
auto & diff = diffs.emplace_back();
|
|
91
|
+
diff.content_delta = string_diff(previous_msg.content, new_msg.content);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
|
|
95
|
+
throw std::runtime_error("Invalid diff: now finding less tool calls!");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (!previous_msg.tool_calls.empty()) {
|
|
99
|
+
auto idx = previous_msg.tool_calls.size() - 1;
|
|
100
|
+
const auto & pref = previous_msg.tool_calls[idx];
|
|
101
|
+
const auto & newf = new_msg.tool_calls[idx];
|
|
102
|
+
if (pref.name != newf.name) {
|
|
103
|
+
throw std::runtime_error("Invalid diff: tool call mismatch!");
|
|
104
|
+
}
|
|
105
|
+
auto args_diff = string_diff(pref.arguments, newf.arguments);
|
|
106
|
+
if (!args_diff.empty() || pref.id != newf.id) {
|
|
107
|
+
auto & diff = diffs.emplace_back();
|
|
108
|
+
diff.tool_call_index = idx;
|
|
109
|
+
if (pref.id != newf.id) {
|
|
110
|
+
diff.tool_call_delta.id = newf.id;
|
|
111
|
+
diff.tool_call_delta.name = newf.name;
|
|
112
|
+
}
|
|
113
|
+
diff.tool_call_delta.arguments = args_diff;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
|
|
117
|
+
auto & diff = diffs.emplace_back();
|
|
118
|
+
diff.tool_call_index = idx;
|
|
119
|
+
diff.tool_call_delta = new_msg.tool_calls[idx];
|
|
120
|
+
}
|
|
121
|
+
return diffs;
|
|
122
|
+
}
|
|
8
123
|
|
|
9
124
|
typedef minja::chat_template common_chat_template;
|
|
10
125
|
|
|
@@ -23,7 +138,8 @@ struct templates_params {
|
|
|
23
138
|
bool stream;
|
|
24
139
|
std::string grammar;
|
|
25
140
|
bool add_generation_prompt = true;
|
|
26
|
-
bool
|
|
141
|
+
bool enable_thinking = true;
|
|
142
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
27
143
|
};
|
|
28
144
|
|
|
29
145
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
|
|
@@ -267,6 +383,32 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
|
|
|
267
383
|
return result;
|
|
268
384
|
}
|
|
269
385
|
|
|
386
|
+
template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
|
|
387
|
+
json delta = json::object();
|
|
388
|
+
// if (!diff.reasoning_content_delta.empty()) {
|
|
389
|
+
// delta["reasoning_content"] = msg.reasoning_content;
|
|
390
|
+
// }
|
|
391
|
+
if (!diff.content_delta.empty()) {
|
|
392
|
+
delta["content"] = diff.content_delta;
|
|
393
|
+
}
|
|
394
|
+
if (diff.tool_call_index != std::string::npos) {
|
|
395
|
+
json tool_call;
|
|
396
|
+
tool_call["index"] = diff.tool_call_index;
|
|
397
|
+
if (!diff.tool_call_delta.id.empty()) {
|
|
398
|
+
tool_call["id"] = diff.tool_call_delta.id;
|
|
399
|
+
tool_call["type"] = "function";
|
|
400
|
+
}
|
|
401
|
+
json function = json::object();
|
|
402
|
+
if (!diff.tool_call_delta.name.empty()) {
|
|
403
|
+
function["name"] = diff.tool_call_delta.name;
|
|
404
|
+
}
|
|
405
|
+
function["arguments"] = diff.tool_call_delta.arguments;
|
|
406
|
+
tool_call["function"] = function;
|
|
407
|
+
delta["tool_calls"] = json::array({tool_call});
|
|
408
|
+
}
|
|
409
|
+
return delta;
|
|
410
|
+
}
|
|
411
|
+
|
|
270
412
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
|
|
271
413
|
if (use_jinja) {
|
|
272
414
|
try {
|
|
@@ -434,7 +576,7 @@ common_chat_templates_ptr common_chat_templates_init(
|
|
|
434
576
|
return tmpls;
|
|
435
577
|
}
|
|
436
578
|
|
|
437
|
-
|
|
579
|
+
const char * common_chat_format_name(common_chat_format format) {
|
|
438
580
|
switch (format) {
|
|
439
581
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
|
440
582
|
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
|
@@ -442,182 +584,127 @@ std::string common_chat_format_name(common_chat_format format) {
|
|
|
442
584
|
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
|
443
585
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
|
444
586
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
|
445
|
-
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
|
|
446
587
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
|
447
588
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
|
448
589
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
|
449
590
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
|
450
|
-
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
|
|
451
591
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
|
452
|
-
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
|
453
592
|
default:
|
|
454
593
|
throw std::runtime_error("Unknown chat format");
|
|
455
594
|
}
|
|
456
595
|
}
|
|
457
596
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
json_error_locator() : position(0), found_error(false) {}
|
|
465
|
-
|
|
466
|
-
bool parse_error(std::size_t position, const std::string &, const json::exception &) override { // NOLINT
|
|
467
|
-
this->position = position - 1;
|
|
468
|
-
this->found_error = true;
|
|
469
|
-
return false;
|
|
470
|
-
}
|
|
471
|
-
bool null() override { return true; } // NOLINT
|
|
472
|
-
bool boolean(bool) override { return true; } // NOLINT
|
|
473
|
-
bool number_integer(number_integer_t) override { return true; } // NOLINT
|
|
474
|
-
bool number_unsigned(number_unsigned_t) override { return true; } // NOLINT
|
|
475
|
-
bool number_float(number_float_t, const string_t &) override { return true; } // NOLINT
|
|
476
|
-
bool string(string_t &) override { return true; } // NOLINT
|
|
477
|
-
bool binary(binary_t &) override { return true; } // NOLINT
|
|
478
|
-
bool start_object(std::size_t) override { return true; } // NOLINT
|
|
479
|
-
bool key(string_t &) override { return true; } // NOLINT
|
|
480
|
-
bool end_object() override { return true; }
|
|
481
|
-
bool start_array(std::size_t) override { return true; } // NOLINT
|
|
482
|
-
bool end_array() override { return true; }
|
|
483
|
-
};
|
|
484
|
-
json_error_locator err_loc;
|
|
485
|
-
json::sax_parse(it, end, &err_loc);
|
|
486
|
-
|
|
487
|
-
std::string::const_iterator temptative_end;
|
|
488
|
-
if (err_loc.found_error) {
|
|
489
|
-
temptative_end = it + err_loc.position;
|
|
490
|
-
} else {
|
|
491
|
-
temptative_end = end;
|
|
492
|
-
}
|
|
493
|
-
std::string json_sub {it, temptative_end};
|
|
494
|
-
try {
|
|
495
|
-
out = json::parse(json_sub);
|
|
496
|
-
it = temptative_end;
|
|
497
|
-
return true;
|
|
498
|
-
} catch (const std::exception &) {
|
|
499
|
-
return false;
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
static bool parse_literal(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
|
|
504
|
-
auto expected_it = expected.begin();
|
|
505
|
-
auto tmp_it = it;
|
|
506
|
-
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
|
|
507
|
-
++tmp_it;
|
|
508
|
-
++expected_it;
|
|
509
|
-
}
|
|
510
|
-
if (expected_it == expected.end()) {
|
|
511
|
-
it = tmp_it;
|
|
512
|
-
return true;
|
|
513
|
-
}
|
|
514
|
-
return false;
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
static std::optional<std::smatch> parse_pattern(std::string::const_iterator & it, const std::string::const_iterator & end, const std::regex & expected) {
|
|
518
|
-
std::smatch match;
|
|
519
|
-
if (std::regex_match(it, end, match, expected)) {
|
|
520
|
-
it = match.suffix().first;
|
|
521
|
-
return match;
|
|
597
|
+
const char * common_reasoning_format_name(common_reasoning_format format) {
|
|
598
|
+
switch (format) {
|
|
599
|
+
case COMMON_REASONING_FORMAT_NONE: return "none";
|
|
600
|
+
case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
|
|
601
|
+
default:
|
|
602
|
+
throw std::runtime_error("Unknown reasoning format");
|
|
522
603
|
}
|
|
523
|
-
return std::nullopt;
|
|
524
604
|
}
|
|
525
605
|
|
|
526
|
-
static
|
|
527
|
-
|
|
528
|
-
|
|
606
|
+
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
|
|
607
|
+
std::string arguments;
|
|
608
|
+
if (builder.is_partial()) {
|
|
609
|
+
arguments = (json {{"code", code + builder.healing_marker()}}).dump();
|
|
610
|
+
auto idx = arguments.find(builder.healing_marker());
|
|
611
|
+
if (idx != std::string::npos) {
|
|
612
|
+
arguments.resize(idx);
|
|
613
|
+
}
|
|
614
|
+
} else {
|
|
615
|
+
arguments = (json {{"code", code}}).dump();
|
|
529
616
|
}
|
|
617
|
+
return arguments;
|
|
530
618
|
}
|
|
531
619
|
|
|
532
620
|
/**
|
|
533
621
|
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
|
534
622
|
* Aggregates the prefix, suffix and in-between text into the content.
|
|
535
623
|
*/
|
|
536
|
-
static
|
|
537
|
-
|
|
538
|
-
const std::optional<
|
|
539
|
-
const std::
|
|
540
|
-
const std::
|
|
541
|
-
|
|
542
|
-
std::
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
624
|
+
static void parse_json_tool_calls(
|
|
625
|
+
common_chat_msg_parser & builder,
|
|
626
|
+
const std::optional<common_regex> & block_open,
|
|
627
|
+
const std::optional<common_regex> & function_regex_start_only,
|
|
628
|
+
const std::optional<common_regex> & function_regex,
|
|
629
|
+
const common_regex & close_regex,
|
|
630
|
+
const std::optional<common_regex> & block_close,
|
|
631
|
+
bool allow_raw_python = false,
|
|
632
|
+
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
|
|
633
|
+
|
|
634
|
+
auto parse_tool_calls = [&]() {
|
|
635
|
+
size_t from = std::string::npos;
|
|
636
|
+
auto first = true;
|
|
637
|
+
while (true) {
|
|
638
|
+
auto res = function_regex_start_only && first
|
|
639
|
+
? builder.try_consume_regex(*function_regex_start_only)
|
|
640
|
+
: function_regex
|
|
641
|
+
? builder.try_find_regex(*function_regex, from)
|
|
642
|
+
: std::nullopt;
|
|
643
|
+
if (res) {
|
|
644
|
+
std::string name;
|
|
645
|
+
if (get_function_name) {
|
|
646
|
+
name = get_function_name(*res);
|
|
647
|
+
} else {
|
|
648
|
+
GGML_ASSERT(res->groups.size() == 2);
|
|
649
|
+
name = builder.str(res->groups[1]);
|
|
650
|
+
}
|
|
651
|
+
first = false;
|
|
652
|
+
if (name.empty()) {
|
|
653
|
+
// get_function_name signalled us that we should skip this match and treat it as content.
|
|
654
|
+
from = res->groups[0].begin + 1;
|
|
655
|
+
continue;
|
|
656
|
+
}
|
|
657
|
+
from = std::string::npos;
|
|
559
658
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
659
|
+
auto maybe_raw_python = name == "python" && allow_raw_python;
|
|
660
|
+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
|
|
661
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
|
662
|
+
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
|
|
663
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
664
|
+
}
|
|
665
|
+
builder.consume_regex(close_regex);
|
|
666
|
+
}
|
|
667
|
+
continue;
|
|
668
|
+
}
|
|
669
|
+
if (maybe_raw_python) {
|
|
670
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
|
671
|
+
if (!builder.add_tool_call(name, "", arguments)) {
|
|
672
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
673
|
+
}
|
|
674
|
+
return;
|
|
675
|
+
}
|
|
676
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
677
|
+
}
|
|
565
678
|
break;
|
|
566
679
|
}
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
it = rit->suffix().first;
|
|
570
|
-
|
|
571
|
-
json arguments;
|
|
572
|
-
if (parse_json(it, end, arguments)) {
|
|
573
|
-
if (!std::regex_search(it, end, match, close_regex)) {
|
|
574
|
-
throw std::runtime_error("Malformed input, missing closing pattern: " + input);
|
|
575
|
-
}
|
|
576
|
-
it = match.suffix().first;
|
|
577
|
-
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
|
578
|
-
} else {
|
|
579
|
-
if (allow_raw_python && name == "python") {
|
|
580
|
-
result.tool_calls.push_back({name, json({{"code", std::string(it, end)}}).dump(), /* id= */ ""});
|
|
581
|
-
break;
|
|
582
|
-
}
|
|
583
|
-
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
|
|
680
|
+
if (block_close) {
|
|
681
|
+
builder.consume_regex(*block_close);
|
|
584
682
|
}
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
683
|
+
builder.consume_spaces();
|
|
684
|
+
builder.add_content(builder.consume_rest());
|
|
685
|
+
};
|
|
686
|
+
if (block_open) {
|
|
687
|
+
if (auto res = builder.try_find_regex(*block_open)) {
|
|
688
|
+
parse_tool_calls();
|
|
689
|
+
} else {
|
|
690
|
+
builder.add_content(builder.consume_rest());
|
|
590
691
|
}
|
|
591
|
-
|
|
692
|
+
} else {
|
|
693
|
+
parse_tool_calls();
|
|
592
694
|
}
|
|
593
|
-
return result;
|
|
594
695
|
}
|
|
595
696
|
|
|
596
|
-
static
|
|
597
|
-
const
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
}
|
|
604
|
-
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
|
|
605
|
-
auto content_end = input.find(prefix);
|
|
606
|
-
size_t tc_start = std::string::npos;
|
|
607
|
-
|
|
608
|
-
common_chat_msg result;
|
|
609
|
-
result.role = "assistant";
|
|
610
|
-
if (content_end == std::string::npos) {
|
|
611
|
-
result.content = input;
|
|
612
|
-
} else {
|
|
613
|
-
tc_start = content_end + prefix.size() - rstrip_prefix;
|
|
614
|
-
result.content = input.substr(0, content_end);
|
|
615
|
-
auto tool_calls = json::parse(input.substr(tc_start));
|
|
616
|
-
for (const auto & tool_call : tool_calls) {
|
|
617
|
-
result.tool_calls.emplace_back(process_tool_call(tool_call));
|
|
697
|
+
static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
|
|
698
|
+
static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
|
|
699
|
+
if (auto res = builder.try_find_regex(prefix)) {
|
|
700
|
+
builder.move_back(rstrip_prefix);
|
|
701
|
+
auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
|
|
702
|
+
if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
|
|
703
|
+
throw common_chat_msg_partial_exception("incomplete tool call array");
|
|
618
704
|
}
|
|
705
|
+
} else {
|
|
706
|
+
builder.add_content(builder.consume_rest());
|
|
619
707
|
}
|
|
620
|
-
return result;
|
|
621
708
|
}
|
|
622
709
|
|
|
623
710
|
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
|
@@ -744,29 +831,36 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
|
|
|
744
831
|
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
|
745
832
|
return data;
|
|
746
833
|
}
|
|
747
|
-
static
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
834
|
+
static void common_chat_parse_generic(common_chat_msg_parser & builder) {
|
|
835
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
836
|
+
builder.add_content(builder.consume_rest());
|
|
837
|
+
return;
|
|
838
|
+
}
|
|
839
|
+
static const std::vector<std::vector<std::string>> content_paths = {
|
|
840
|
+
{"response"},
|
|
841
|
+
};
|
|
842
|
+
static const std::vector<std::vector<std::string>> args_paths = {
|
|
843
|
+
{"tool_call", "arguments"},
|
|
844
|
+
{"tool_calls", "arguments"},
|
|
845
|
+
};
|
|
846
|
+
auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
|
|
847
|
+
if (data.value.contains("tool_calls")) {
|
|
848
|
+
if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
|
|
849
|
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
|
758
850
|
}
|
|
759
|
-
} else if (data.contains("tool_call")) {
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
851
|
+
} else if (data.value.contains("tool_call")) {
|
|
852
|
+
if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
|
|
853
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
854
|
+
}
|
|
855
|
+
} else if (data.value.contains("response")) {
|
|
856
|
+
const auto & response = data.value.at("response");
|
|
857
|
+
builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
|
|
858
|
+
if (data.is_partial) {
|
|
859
|
+
throw common_chat_msg_partial_exception("incomplete response");
|
|
860
|
+
}
|
|
861
|
+
} else {
|
|
862
|
+
throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
|
|
768
863
|
}
|
|
769
|
-
return result;
|
|
770
864
|
}
|
|
771
865
|
|
|
772
866
|
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
@@ -813,12 +907,44 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
|
813
907
|
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
|
814
908
|
return data;
|
|
815
909
|
}
|
|
816
|
-
static
|
|
817
|
-
|
|
910
|
+
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
|
911
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
912
|
+
builder.add_content(builder.consume_rest());
|
|
913
|
+
return;
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
|
917
|
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
|
818
918
|
}
|
|
819
919
|
|
|
820
920
|
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
821
921
|
common_chat_params data;
|
|
922
|
+
|
|
923
|
+
auto adjusted_messages = json::array();
|
|
924
|
+
for (const auto & msg : inputs.messages) {
|
|
925
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
926
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
927
|
+
if (has_reasoning_content && has_tool_calls) {
|
|
928
|
+
auto adjusted_message = msg;
|
|
929
|
+
adjusted_message["tool_plan"] = msg.at("reasoning_content");
|
|
930
|
+
adjusted_message.erase("reasoning_content");
|
|
931
|
+
adjusted_messages.push_back(adjusted_message);
|
|
932
|
+
} else {
|
|
933
|
+
adjusted_messages.push_back(msg);
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
|
937
|
+
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
|
938
|
+
if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
|
|
939
|
+
if (!inputs.enable_thinking) {
|
|
940
|
+
data.prompt += "<|END_THINKING|>";
|
|
941
|
+
} else {
|
|
942
|
+
data.thinking_forced_open = true;
|
|
943
|
+
}
|
|
944
|
+
} else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
|
|
945
|
+
data.prompt += "<|START_THINKING|><|END_THINKING|>";
|
|
946
|
+
}
|
|
947
|
+
|
|
822
948
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
823
949
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
824
950
|
auto schemas = json::array();
|
|
@@ -849,11 +975,16 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
|
849
975
|
if (!inputs.parallel_tool_calls) {
|
|
850
976
|
schema["maxItems"] = 1;
|
|
851
977
|
}
|
|
852
|
-
builder.add_rule("root",
|
|
978
|
+
builder.add_rule("root",
|
|
979
|
+
std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
|
|
980
|
+
"\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
|
|
853
981
|
});
|
|
854
982
|
data.grammar_triggers.push_back({
|
|
855
|
-
|
|
856
|
-
|
|
983
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
984
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
985
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
986
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
|
|
987
|
+
"(<\\|START_ACTION\\|>)[\\s\\S]*"
|
|
857
988
|
});
|
|
858
989
|
data.preserved_tokens = {
|
|
859
990
|
"<|START_ACTION|>",
|
|
@@ -863,61 +994,40 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
|
863
994
|
"<|START_THINKING|>",
|
|
864
995
|
"<|END_THINKING|>",
|
|
865
996
|
};
|
|
866
|
-
auto adjusted_messages = json::array();
|
|
867
|
-
for (const auto & msg : inputs.messages) {
|
|
868
|
-
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
869
|
-
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
870
|
-
if (has_reasoning_content && has_tool_calls) {
|
|
871
|
-
auto adjusted_message = msg;
|
|
872
|
-
adjusted_message["tool_plan"] = msg.at("reasoning_content");
|
|
873
|
-
adjusted_message.erase("reasoning_content");
|
|
874
|
-
adjusted_messages.push_back(adjusted_message);
|
|
875
|
-
} else {
|
|
876
|
-
adjusted_messages.push_back(msg);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
|
880
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
|
|
881
997
|
return data;
|
|
882
998
|
}
|
|
883
|
-
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
|
884
|
-
static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
|
885
|
-
static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
|
886
|
-
static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
|
887
|
-
|
|
888
|
-
std::smatch match;
|
|
889
|
-
|
|
890
|
-
common_chat_msg result;
|
|
891
|
-
result.role = "assistant";
|
|
892
|
-
|
|
893
|
-
std::string rest = input;
|
|
894
999
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1000
|
+
static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
|
|
1001
|
+
builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
|
|
1002
|
+
|
|
1003
|
+
static const common_regex start_action_regex("<\\|START_ACTION\\|>");
|
|
1004
|
+
static const common_regex end_action_regex("<\\|END_ACTION\\|>");
|
|
1005
|
+
static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
|
|
1006
|
+
static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
|
|
1007
|
+
|
|
1008
|
+
if (auto res = builder.try_find_regex(start_action_regex)) {
|
|
1009
|
+
// If we didn't extract thoughts, prelude includes them.
|
|
1010
|
+
auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
|
|
1011
|
+
for (const auto & tool_call : tool_calls.value) {
|
|
1012
|
+
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
|
|
1013
|
+
std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
|
|
1014
|
+
std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
|
|
1015
|
+
if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
|
|
1016
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1017
|
+
}
|
|
901
1018
|
}
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
/* .arguments = */ action.at("parameters").dump(),
|
|
911
|
-
/* .id = */ action.at("tool_call_id"),
|
|
912
|
-
});
|
|
1019
|
+
if (tool_calls.is_partial) {
|
|
1020
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1021
|
+
}
|
|
1022
|
+
builder.consume_regex(end_action_regex);
|
|
1023
|
+
} else if (auto res = builder.try_find_regex(start_response_regex)) {
|
|
1024
|
+
if (!builder.try_find_regex(end_response_regex)) {
|
|
1025
|
+
builder.add_content(builder.consume_rest());
|
|
1026
|
+
throw common_chat_msg_partial_exception(end_response_regex.str());
|
|
913
1027
|
}
|
|
914
|
-
} else if (std::regex_match(rest, match, response_regex)) {
|
|
915
|
-
auto response = match[1].str();
|
|
916
|
-
result.content += response;
|
|
917
1028
|
} else {
|
|
918
|
-
|
|
1029
|
+
builder.add_content(builder.consume_rest());
|
|
919
1030
|
}
|
|
920
|
-
return result;
|
|
921
1031
|
}
|
|
922
1032
|
|
|
923
1033
|
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
|
@@ -939,116 +1049,172 @@ static void expect_tool_parameters(const std::string & name, const json & parame
|
|
|
939
1049
|
}
|
|
940
1050
|
}
|
|
941
1051
|
|
|
942
|
-
static common_chat_params
|
|
1052
|
+
static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
|
|
943
1053
|
auto builtin_tools = json::array();
|
|
944
1054
|
common_chat_params data;
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
|
950
|
-
if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
|
|
951
|
-
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
|
952
|
-
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
|
953
|
-
expect_tool_parameters(name, parameters, {"query"});
|
|
954
|
-
} else if (name == "python" || name == "code_interpreter") {
|
|
955
|
-
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
|
956
|
-
expect_tool_parameters(name, parameters, {"code"});
|
|
957
|
-
} else {
|
|
958
|
-
return false;
|
|
959
|
-
}
|
|
1055
|
+
if (!inputs.tools.is_null()) {
|
|
1056
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1057
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1058
|
+
std::vector<std::string> tool_rules;
|
|
960
1059
|
|
|
961
|
-
std::
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
1060
|
+
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
|
1061
|
+
if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
|
|
1062
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
|
1063
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
|
1064
|
+
expect_tool_parameters(name, parameters, {"query"});
|
|
1065
|
+
} else if (name == "python" || name == "code_interpreter") {
|
|
1066
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
|
1067
|
+
expect_tool_parameters(name, parameters, {"code"});
|
|
1068
|
+
} else {
|
|
1069
|
+
return false;
|
|
1070
|
+
}
|
|
965
1071
|
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
name + "-
|
|
969
|
-
|
|
970
|
-
builtin_tools.push_back(name);
|
|
1072
|
+
std::vector<std::string> kvs;
|
|
1073
|
+
for (const auto & [key, value] : parameters.at("properties").items()) {
|
|
1074
|
+
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
|
|
1075
|
+
}
|
|
971
1076
|
|
|
972
|
-
|
|
973
|
-
|
|
1077
|
+
tool_rules.push_back(
|
|
1078
|
+
builder.add_rule(
|
|
1079
|
+
name + "-call",
|
|
1080
|
+
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
|
1081
|
+
builtin_tools.push_back(name);
|
|
974
1082
|
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
1083
|
+
return true;
|
|
1084
|
+
};
|
|
1085
|
+
|
|
1086
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1087
|
+
const auto & function = tool.at("function");
|
|
1088
|
+
std::string name = function.at("name");
|
|
1089
|
+
auto parameters = function.at("parameters");
|
|
1090
|
+
builder.resolve_refs(parameters);
|
|
980
1091
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1092
|
+
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
|
1093
|
+
if (allow_python_tag_builtin_tools) {
|
|
1094
|
+
handle_builtin_tool(name, parameters);
|
|
1095
|
+
}
|
|
1096
|
+
tool_rules.push_back(
|
|
1097
|
+
builder.add_rule(
|
|
1098
|
+
name + "-call",
|
|
1099
|
+
"\"{\" space "
|
|
1100
|
+
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
|
1101
|
+
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
|
1102
|
+
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
|
1103
|
+
"\"}\" space"));
|
|
1104
|
+
});
|
|
1105
|
+
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
|
|
1106
|
+
data.grammar_triggers.push_back({
|
|
1107
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1108
|
+
"(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
|
|
1109
|
+
});
|
|
1110
|
+
if (!builtin_tools.empty()) {
|
|
1111
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
1112
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
|
984
1113
|
}
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
"\"{\" space "
|
|
989
|
-
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
|
990
|
-
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
|
991
|
-
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
|
992
|
-
"\"}\" space"));
|
|
1114
|
+
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
|
1115
|
+
builder.add_rule("root", string_join(tool_rules, " | "));
|
|
1116
|
+
data.additional_stops.push_back("<|eom_id|>");
|
|
993
1117
|
});
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
1001
|
-
data.preserved_tokens.push_back("<|python_tag|>");
|
|
1002
|
-
}
|
|
1003
|
-
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
|
1004
|
-
builder.add_rule("root", string_join(tool_rules, " | "));
|
|
1005
|
-
});
|
|
1006
|
-
data.additional_stops.push_back("<|eom_id|>");
|
|
1118
|
+
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
|
1119
|
+
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
|
1120
|
+
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
|
1121
|
+
} else {
|
|
1122
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
1123
|
+
}
|
|
1007
1124
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
|
1125
|
+
{"date_string", format_time(inputs.now, "%d %b %Y")},
|
|
1008
1126
|
{"tools_in_user_message", false},
|
|
1009
1127
|
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
|
1010
1128
|
});
|
|
1011
|
-
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
|
1012
|
-
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
|
1013
|
-
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
|
1014
1129
|
return data;
|
|
1015
1130
|
}
|
|
1016
|
-
static
|
|
1017
|
-
|
|
1018
|
-
|
|
1131
|
+
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
|
|
1132
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1133
|
+
builder.add_content(builder.consume_rest());
|
|
1134
|
+
return;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
static const common_regex function_regex(
|
|
1019
1138
|
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
|
1020
|
-
static const
|
|
1021
|
-
|
|
1139
|
+
static const common_regex close_regex("\\}\\s*");
|
|
1140
|
+
|
|
1141
|
+
static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
|
|
1142
|
+
static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
|
|
1022
1143
|
|
|
1023
1144
|
if (with_builtin_tools) {
|
|
1024
|
-
|
|
1025
|
-
if (
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1145
|
+
static const common_regex builtin_call_regex("<\\|python_tag\\|>");
|
|
1146
|
+
if (auto res = builder.try_find_regex(builtin_call_regex)) {
|
|
1147
|
+
auto fun_res = builder.consume_regex(function_name_regex);
|
|
1148
|
+
auto function_name = builder.str(fun_res.groups[1]);
|
|
1149
|
+
|
|
1150
|
+
common_healing_marker healing_marker;
|
|
1151
|
+
json args = json::object();
|
|
1152
|
+
while (true) {
|
|
1153
|
+
if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
|
|
1154
|
+
auto arg_name = builder.str(arg_res->groups[1]);
|
|
1155
|
+
auto partial = builder.consume_json();
|
|
1156
|
+
args[arg_name] = partial.json;
|
|
1157
|
+
healing_marker.marker = partial.healing_marker.marker;
|
|
1158
|
+
healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
|
|
1159
|
+
builder.consume_spaces();
|
|
1160
|
+
if (!builder.try_consume_literal(",")) {
|
|
1161
|
+
break;
|
|
1162
|
+
}
|
|
1163
|
+
} else {
|
|
1164
|
+
break;
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
builder.consume_literal(")");
|
|
1168
|
+
builder.consume_spaces();
|
|
1169
|
+
|
|
1170
|
+
auto arguments = args.dump();
|
|
1171
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
|
1172
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1044
1173
|
}
|
|
1174
|
+
return;
|
|
1045
1175
|
}
|
|
1046
1176
|
}
|
|
1047
|
-
|
|
1177
|
+
parse_json_tool_calls(
|
|
1178
|
+
builder,
|
|
1179
|
+
/* block_open= */ std::nullopt,
|
|
1180
|
+
/* function_regex_start_only= */ function_regex,
|
|
1181
|
+
/* function_regex= */ std::nullopt,
|
|
1182
|
+
close_regex,
|
|
1183
|
+
std::nullopt);
|
|
1184
|
+
|
|
1048
1185
|
}
|
|
1049
1186
|
|
|
1050
1187
|
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1051
1188
|
common_chat_params data;
|
|
1189
|
+
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1190
|
+
|
|
1191
|
+
// Hacks to fix the official (broken) prompt.
|
|
1192
|
+
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
|
1193
|
+
// until the official template is fixed.
|
|
1194
|
+
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
|
1195
|
+
// Don't leave the chat dangling after tool results
|
|
1196
|
+
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
|
1197
|
+
prompt += "<|end▁of▁sentence|>";
|
|
1198
|
+
if (inputs.add_generation_prompt) {
|
|
1199
|
+
prompt += "<|Assistant|>";
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
// Fix up tool call delta example added by Minja
|
|
1203
|
+
prompt = std::regex_replace(
|
|
1204
|
+
prompt,
|
|
1205
|
+
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
|
1206
|
+
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
|
1207
|
+
}
|
|
1208
|
+
data.prompt = prompt;
|
|
1209
|
+
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
|
1210
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1211
|
+
if (!inputs.enable_thinking) {
|
|
1212
|
+
data.prompt += "</think>";
|
|
1213
|
+
} else {
|
|
1214
|
+
data.thinking_forced_open = true;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1052
1218
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1053
1219
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
|
1054
1220
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
@@ -1059,21 +1225,25 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|
|
1059
1225
|
auto parameters = function.at("parameters");
|
|
1060
1226
|
builder.resolve_refs(parameters);
|
|
1061
1227
|
tool_rules.push_back(builder.add_rule(name + "-call",
|
|
1062
|
-
"\"<|tool▁call▁begin
|
|
1228
|
+
"( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n"
|
|
1063
1229
|
"```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
|
|
1064
1230
|
"\"```<|tool▁call▁end|>\""));
|
|
1065
1231
|
});
|
|
1066
1232
|
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
|
1067
1233
|
// so we accept common variants (then it's all constrained)
|
|
1068
1234
|
builder.add_rule("root",
|
|
1069
|
-
|
|
1235
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
|
1236
|
+
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
|
|
1070
1237
|
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
|
1071
1238
|
"\"<|tool▁calls▁end|>\""
|
|
1072
1239
|
" space");
|
|
1073
|
-
data.grammar_triggers.push_back({
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1240
|
+
data.grammar_triggers.push_back({
|
|
1241
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1242
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
1243
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1244
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
|
1245
|
+
"(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
|
|
1246
|
+
});
|
|
1077
1247
|
data.preserved_tokens = {
|
|
1078
1248
|
"<think>",
|
|
1079
1249
|
"</think>",
|
|
@@ -1085,72 +1255,34 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|
|
1085
1255
|
};
|
|
1086
1256
|
});
|
|
1087
1257
|
}
|
|
1088
|
-
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1089
|
-
|
|
1090
|
-
// Hacks to fix the official (broken) prompt.
|
|
1091
|
-
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
|
1092
|
-
// until the official template is fixed.
|
|
1093
|
-
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
|
1094
|
-
// Don't leave the chat dangling after tool results
|
|
1095
|
-
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
|
1096
|
-
prompt += "<|end▁of▁sentence|>";
|
|
1097
|
-
if (inputs.add_generation_prompt) {
|
|
1098
|
-
prompt += "<|Assistant|>";
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
// Fix up tool call delta example added by Minja
|
|
1102
|
-
prompt = std::regex_replace(
|
|
1103
|
-
prompt,
|
|
1104
|
-
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
|
1105
|
-
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
|
1106
|
-
}
|
|
1107
|
-
data.prompt = prompt;
|
|
1108
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
|
1109
1258
|
return data;
|
|
1110
1259
|
}
|
|
1111
|
-
static
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
|
1131
|
-
static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
|
1132
|
-
static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
|
1133
|
-
static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
|
1134
|
-
|
|
1135
|
-
common_chat_msg msg;
|
|
1136
|
-
msg.role = "assistant";
|
|
1137
|
-
std::smatch match;
|
|
1138
|
-
if (std::regex_search(input, match, tool_calls_regex)) {
|
|
1139
|
-
auto tool_calls = match[1].str();
|
|
1140
|
-
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
|
1141
|
-
msg.tool_calls = std::move(msg2.tool_calls);
|
|
1142
|
-
} else {
|
|
1143
|
-
msg.content = input;
|
|
1144
|
-
}
|
|
1145
|
-
return msg;
|
|
1146
|
-
});
|
|
1260
|
+
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
|
1261
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1262
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1263
|
+
builder.add_content(builder.consume_rest());
|
|
1264
|
+
return;
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
|
1268
|
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
|
1269
|
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
|
|
1270
|
+
static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
|
1271
|
+
|
|
1272
|
+
parse_json_tool_calls(
|
|
1273
|
+
builder,
|
|
1274
|
+
/* block_open= */ tool_calls_begin,
|
|
1275
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
1276
|
+
function_regex,
|
|
1277
|
+
close_regex,
|
|
1278
|
+
tool_calls_end);
|
|
1147
1279
|
}
|
|
1148
1280
|
|
|
1149
1281
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1150
1282
|
LOG_DBG("%s\n", __func__);
|
|
1151
1283
|
common_chat_params data;
|
|
1152
1284
|
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
|
1153
|
-
{"datetime", "
|
|
1285
|
+
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
|
|
1154
1286
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
1155
1287
|
});
|
|
1156
1288
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
@@ -1191,13 +1323,19 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|
|
1191
1323
|
}
|
|
1192
1324
|
return data;
|
|
1193
1325
|
}
|
|
1194
|
-
static
|
|
1195
|
-
|
|
1326
|
+
static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
|
|
1327
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1328
|
+
builder.add_content(builder.consume_rest());
|
|
1329
|
+
return;
|
|
1330
|
+
}
|
|
1331
|
+
static const common_regex prefix(regex_escape(" functools["));
|
|
1332
|
+
parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
|
|
1196
1333
|
}
|
|
1197
1334
|
|
|
1198
1335
|
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1199
1336
|
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
|
|
1200
1337
|
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
|
|
1338
|
+
// If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
|
|
1201
1339
|
common_chat_params data;
|
|
1202
1340
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1203
1341
|
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
|
@@ -1211,24 +1349,21 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|
|
1211
1349
|
std::string name = function.at("name");
|
|
1212
1350
|
auto parameters = function.at("parameters");
|
|
1213
1351
|
builder.resolve_refs(parameters);
|
|
1352
|
+
std::string args_pattern = "[\\s\\S]*";
|
|
1214
1353
|
auto args_rule = builder.add_schema(name + "-args", parameters);
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
}
|
|
1225
|
-
data.grammar_triggers.push_back({
|
|
1226
|
-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
|
1227
|
-
regex_escape(">>>" + name + "\n"),
|
|
1228
|
-
});
|
|
1354
|
+
if (name == "python") {
|
|
1355
|
+
args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
|
|
1356
|
+
} else {
|
|
1357
|
+
args_pattern = "\\{" + args_pattern;
|
|
1358
|
+
}
|
|
1359
|
+
auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
|
|
1360
|
+
first_tool_rules.push_back(call_rule);
|
|
1361
|
+
if (inputs.parallel_tool_calls) {
|
|
1362
|
+
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
|
|
1363
|
+
}
|
|
1229
1364
|
data.grammar_triggers.push_back({
|
|
1230
|
-
|
|
1231
|
-
"
|
|
1365
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1366
|
+
"((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
|
|
1232
1367
|
});
|
|
1233
1368
|
});
|
|
1234
1369
|
data.preserved_tokens = {
|
|
@@ -1246,319 +1381,311 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|
|
1246
1381
|
}
|
|
1247
1382
|
return data;
|
|
1248
1383
|
}
|
|
1249
|
-
|
|
1250
|
-
static
|
|
1251
|
-
static const
|
|
1252
|
-
static const
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
} catch (const std::exception & e) {
|
|
1277
|
-
LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
|
|
1278
|
-
common_chat_msg res;
|
|
1279
|
-
res.role = "assistant";
|
|
1280
|
-
res.content = input;
|
|
1281
|
-
return res;
|
|
1282
|
-
}
|
|
1384
|
+
static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
|
|
1385
|
+
static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
|
|
1386
|
+
static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
|
|
1387
|
+
static const common_regex close_regex(R"(\s*)");
|
|
1388
|
+
|
|
1389
|
+
parse_json_tool_calls(
|
|
1390
|
+
builder,
|
|
1391
|
+
std::nullopt,
|
|
1392
|
+
function_regex_start_only,
|
|
1393
|
+
function_regex,
|
|
1394
|
+
close_regex,
|
|
1395
|
+
std::nullopt,
|
|
1396
|
+
/* allow_raw_python= */ true,
|
|
1397
|
+
/* get_function_name= */ [&](const auto & res) -> std::string {
|
|
1398
|
+
auto at_start = res.groups[0].begin == 0;
|
|
1399
|
+
auto name = builder.str(res.groups[1]);
|
|
1400
|
+
if (!name.empty() && name.back() == '{') {
|
|
1401
|
+
// Unconsume the opening brace '{' to ensure the JSON parsing goes well.
|
|
1402
|
+
builder.move_back(1);
|
|
1403
|
+
}
|
|
1404
|
+
auto idx = name.find_last_not_of("\n{");
|
|
1405
|
+
name = name.substr(0, idx + 1);
|
|
1406
|
+
if (at_start && name == "all") {
|
|
1407
|
+
return "";
|
|
1408
|
+
}
|
|
1409
|
+
return name;
|
|
1410
|
+
});
|
|
1283
1411
|
}
|
|
1284
1412
|
|
|
1285
1413
|
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1286
1414
|
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
|
1287
1415
|
common_chat_params data;
|
|
1288
|
-
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
|
1289
|
-
std::string python_code_argument_name;
|
|
1290
|
-
auto has_raw_python = false;
|
|
1291
1416
|
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
std::string
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1417
|
+
if (!inputs.tools.is_null()) {
|
|
1418
|
+
std::string python_code_argument_name;
|
|
1419
|
+
auto has_raw_python = false;
|
|
1420
|
+
|
|
1421
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1422
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1423
|
+
std::vector<std::string> tool_rules;
|
|
1424
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1425
|
+
const auto & function = tool.at("function");
|
|
1426
|
+
const auto & parameters = function.at("parameters");
|
|
1427
|
+
std::string name = function.at("name");
|
|
1428
|
+
if (name == "python" || name == "ipython") {
|
|
1429
|
+
if (!parameters.contains("type")) {
|
|
1430
|
+
throw std::runtime_error("Missing type in python tool");
|
|
1431
|
+
}
|
|
1432
|
+
has_raw_python = true;
|
|
1433
|
+
const auto & type = parameters.at("type");
|
|
1434
|
+
if (type == "object") {
|
|
1435
|
+
auto properties = parameters.at("properties");
|
|
1436
|
+
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
|
1437
|
+
if (it.value().at("type") == "string") {
|
|
1438
|
+
if (!python_code_argument_name.empty()) {
|
|
1439
|
+
throw std::runtime_error("Multiple string arguments found in python tool");
|
|
1440
|
+
}
|
|
1441
|
+
python_code_argument_name = it.key();
|
|
1311
1442
|
}
|
|
1312
|
-
python_code_argument_name = it.key();
|
|
1313
1443
|
}
|
|
1444
|
+
if (python_code_argument_name.empty()) {
|
|
1445
|
+
throw std::runtime_error("No string argument found in python tool");
|
|
1446
|
+
}
|
|
1447
|
+
} else if (type != "string") {
|
|
1448
|
+
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
|
1314
1449
|
}
|
|
1315
|
-
if (python_code_argument_name.empty()) {
|
|
1316
|
-
throw std::runtime_error("No string argument found in python tool");
|
|
1317
|
-
}
|
|
1318
|
-
} else if (type != "string") {
|
|
1319
|
-
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
|
1320
1450
|
}
|
|
1451
|
+
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
|
1452
|
+
});
|
|
1453
|
+
if (has_raw_python) {
|
|
1454
|
+
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
|
1455
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
|
1456
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
|
1321
1457
|
}
|
|
1322
|
-
|
|
1458
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
|
1459
|
+
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
1460
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
|
1323
1461
|
});
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
}
|
|
1329
|
-
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
|
1330
|
-
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
1331
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
|
1332
|
-
});
|
|
1462
|
+
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
|
1463
|
+
} else {
|
|
1464
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
1465
|
+
}
|
|
1333
1466
|
|
|
1334
1467
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1335
1468
|
// TODO: if (has_raw_python)
|
|
1336
|
-
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
|
1337
1469
|
return data;
|
|
1338
1470
|
}
|
|
1339
|
-
static
|
|
1471
|
+
static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
|
|
1472
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1473
|
+
builder.add_content(builder.consume_rest());
|
|
1474
|
+
return;
|
|
1475
|
+
}
|
|
1340
1476
|
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
|
1341
|
-
static const
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1477
|
+
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
|
|
1478
|
+
|
|
1479
|
+
static const common_regex function_regex(R"(<function=(\w+)>)");
|
|
1480
|
+
static const common_regex close_regex(R"(</function>)");
|
|
1481
|
+
|
|
1482
|
+
parse_json_tool_calls(
|
|
1483
|
+
builder,
|
|
1484
|
+
/* block_open= */ std::nullopt,
|
|
1485
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
1486
|
+
function_regex,
|
|
1487
|
+
close_regex,
|
|
1488
|
+
std::nullopt);
|
|
1489
|
+
|
|
1490
|
+
if (auto res = builder.try_find_regex(python_tag_regex)) {
|
|
1491
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
|
1492
|
+
builder.add_tool_call("python", "", arguments);
|
|
1493
|
+
return;
|
|
1354
1494
|
}
|
|
1355
|
-
static const std::regex function_regex(R"(<function=(\w+)>)");
|
|
1356
|
-
static const std::regex close_regex(R"(</function>)");
|
|
1357
|
-
// TODO: tighten & simplify.
|
|
1358
|
-
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
|
1359
1495
|
}
|
|
1360
1496
|
|
|
1361
1497
|
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1362
1498
|
common_chat_params data;
|
|
1363
|
-
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
|
1364
|
-
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1365
|
-
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1366
|
-
std::vector<std::string> tool_rules;
|
|
1367
|
-
std::vector<std::string> tool_call_alts;
|
|
1368
|
-
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1369
|
-
const auto & function = tool.at("function");
|
|
1370
|
-
std::string name = function.at("name");
|
|
1371
|
-
auto parameters = function.at("parameters");
|
|
1372
|
-
builder.resolve_refs(parameters);
|
|
1373
|
-
tool_rules.push_back(builder.add_schema(name + "-call", {
|
|
1374
|
-
{"type", "object"},
|
|
1375
|
-
{"properties", json {
|
|
1376
|
-
{"name", json {{"const", name}}},
|
|
1377
|
-
{"arguments", parameters},
|
|
1378
|
-
}},
|
|
1379
|
-
{"required", json::array({"name", "arguments"})},
|
|
1380
|
-
}));
|
|
1381
|
-
tool_call_alts.push_back(builder.add_rule(
|
|
1382
|
-
name + "-function-tag",
|
|
1383
|
-
"\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
|
|
1384
|
-
builder.add_schema(name + "-args", parameters) + " "
|
|
1385
|
-
"\"</function>\" space"));
|
|
1386
1499
|
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1500
|
+
json additional_context = {
|
|
1501
|
+
{"enable_thinking", inputs.enable_thinking},
|
|
1502
|
+
};
|
|
1503
|
+
|
|
1504
|
+
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, additional_context);
|
|
1505
|
+
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
|
1506
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1507
|
+
if (!inputs.enable_thinking) {
|
|
1508
|
+
data.prompt += "</think>";
|
|
1509
|
+
} else {
|
|
1510
|
+
data.thinking_forced_open = true;
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
if (!inputs.tools.is_null()) {
|
|
1515
|
+
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
|
1516
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1517
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1518
|
+
std::vector<std::string> tool_rules;
|
|
1519
|
+
std::vector<std::string> tool_call_alts;
|
|
1520
|
+
std::vector<std::string> escaped_names;
|
|
1521
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1522
|
+
const auto & function = tool.at("function");
|
|
1523
|
+
std::string name = function.at("name");
|
|
1524
|
+
auto parameters = function.at("parameters");
|
|
1525
|
+
builder.resolve_refs(parameters);
|
|
1526
|
+
tool_rules.push_back(builder.add_schema(name + "-call", {
|
|
1527
|
+
{"type", "object"},
|
|
1528
|
+
{"properties", json {
|
|
1529
|
+
{"name", json {{"const", name}}},
|
|
1530
|
+
{"arguments", parameters},
|
|
1531
|
+
}},
|
|
1532
|
+
{"required", json::array({"name", "arguments"})},
|
|
1533
|
+
}));
|
|
1534
|
+
tool_call_alts.push_back(builder.add_rule(
|
|
1535
|
+
name + "-function-tag",
|
|
1536
|
+
"\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
|
|
1537
|
+
builder.add_schema(name + "-args", parameters) + " "
|
|
1538
|
+
"\"</function>\" space"));
|
|
1539
|
+
|
|
1540
|
+
data.grammar_triggers.push_back({
|
|
1541
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
|
1542
|
+
"<function=" + name + ">",
|
|
1543
|
+
});
|
|
1544
|
+
auto escaped_name = regex_escape(name);
|
|
1545
|
+
data.grammar_triggers.push_back({
|
|
1546
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
|
1547
|
+
"<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
|
|
1548
|
+
});
|
|
1549
|
+
escaped_names.push_back(escaped_name);
|
|
1390
1550
|
});
|
|
1391
|
-
auto
|
|
1551
|
+
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
|
|
1552
|
+
std::vector<std::string> alt_tags {
|
|
1553
|
+
any_tool_call,
|
|
1554
|
+
"\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
|
|
1555
|
+
// The rest is just to accommodate common "good bad" outputs.
|
|
1556
|
+
"\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
|
|
1557
|
+
"\"<response>\" space " + any_tool_call + " \"</response>\"",
|
|
1558
|
+
"\"<tools>\" space " + any_tool_call + " \"</tools>\"",
|
|
1559
|
+
"\"<json>\" space " + any_tool_call + " \"</json>\"",
|
|
1560
|
+
"\"<xml>\" space " + any_tool_call + " \"</xml>\"",
|
|
1561
|
+
"\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
|
|
1562
|
+
};
|
|
1563
|
+
auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
|
|
1564
|
+
tool_call_alts.push_back(wrappable_tool_call);
|
|
1565
|
+
tool_call_alts.push_back(
|
|
1566
|
+
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
|
|
1567
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
|
|
1568
|
+
builder.add_rule("root",
|
|
1569
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
|
1570
|
+
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
|
|
1571
|
+
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
|
|
1392
1572
|
data.grammar_triggers.push_back({
|
|
1393
|
-
|
|
1394
|
-
|
|
1573
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1574
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
1575
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1576
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
|
|
1577
|
+
"(\\s*"
|
|
1578
|
+
"(?:<tool_call>"
|
|
1579
|
+
"|<function"
|
|
1580
|
+
"|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
|
|
1581
|
+
"\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
|
|
1582
|
+
")"
|
|
1583
|
+
")[\\s\\S]*"
|
|
1584
|
+
),
|
|
1395
1585
|
});
|
|
1586
|
+
data.preserved_tokens = {
|
|
1587
|
+
"<think>",
|
|
1588
|
+
"</think>",
|
|
1589
|
+
"<tool_call>",
|
|
1590
|
+
"</tool_call>",
|
|
1591
|
+
"<function",
|
|
1592
|
+
"<tools>",
|
|
1593
|
+
"</tools>",
|
|
1594
|
+
"<response>",
|
|
1595
|
+
"</response>",
|
|
1596
|
+
"<function_call>",
|
|
1597
|
+
"</function_call>",
|
|
1598
|
+
"<json>",
|
|
1599
|
+
"</json>",
|
|
1600
|
+
"<JSON>",
|
|
1601
|
+
"</JSON>",
|
|
1602
|
+
"```",
|
|
1603
|
+
"```json",
|
|
1604
|
+
"```xml",
|
|
1605
|
+
};
|
|
1396
1606
|
});
|
|
1397
|
-
|
|
1398
|
-
std::vector<std::string> alt_tags {
|
|
1399
|
-
any_tool_call,
|
|
1400
|
-
"\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
|
|
1401
|
-
// The rest is just to accommodate common "good bad" outputs.
|
|
1402
|
-
"\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
|
|
1403
|
-
"\"<response>\" space " + any_tool_call + " \"</response>\"",
|
|
1404
|
-
"\"<tools>\" space " + any_tool_call + " \"</tools>\"",
|
|
1405
|
-
"\"<json>\" space " + any_tool_call + " \"</json>\"",
|
|
1406
|
-
"\"<xml>\" space " + any_tool_call + " \"</xml>\"",
|
|
1407
|
-
"\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
|
|
1408
|
-
};
|
|
1409
|
-
auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
|
|
1410
|
-
tool_call_alts.push_back(wrappable_tool_call);
|
|
1411
|
-
tool_call_alts.push_back(
|
|
1412
|
-
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
|
|
1413
|
-
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
|
|
1414
|
-
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
1415
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"});
|
|
1416
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function"});
|
|
1417
|
-
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
|
|
1418
|
-
data.grammar_triggers.push_back({
|
|
1419
|
-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
|
|
1420
|
-
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
|
|
1421
|
-
});
|
|
1422
|
-
data.preserved_tokens = {
|
|
1423
|
-
"<think>",
|
|
1424
|
-
"</think>",
|
|
1425
|
-
"<tool_call>",
|
|
1426
|
-
"</tool_call>",
|
|
1427
|
-
"<function",
|
|
1428
|
-
"<tools>",
|
|
1429
|
-
"</tools>",
|
|
1430
|
-
"<response>",
|
|
1431
|
-
"</response>",
|
|
1432
|
-
"<function_call>",
|
|
1433
|
-
"</function_call>",
|
|
1434
|
-
"<json>",
|
|
1435
|
-
"</json>",
|
|
1436
|
-
"<JSON>",
|
|
1437
|
-
"</JSON>",
|
|
1438
|
-
"```",
|
|
1439
|
-
"```json",
|
|
1440
|
-
"```xml",
|
|
1441
|
-
};
|
|
1442
|
-
});
|
|
1607
|
+
}
|
|
1443
1608
|
|
|
1444
|
-
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
1445
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
|
1446
1609
|
return data;
|
|
1447
1610
|
}
|
|
1448
|
-
static
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
"
|
|
1458
|
-
"
|
|
1459
|
-
|
|
1460
|
-
|
|
1611
|
+
static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
|
|
1612
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1613
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1614
|
+
builder.add_content(builder.consume_rest());
|
|
1615
|
+
return;
|
|
1616
|
+
}
|
|
1617
|
+
|
|
1618
|
+
static const common_regex open_regex(
|
|
1619
|
+
"(?:"
|
|
1620
|
+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
|
1621
|
+
"(" // match 2 (open_tag)
|
|
1622
|
+
"<tool_call>"
|
|
1623
|
+
"|<function_call>"
|
|
1624
|
+
"|<tool>"
|
|
1625
|
+
"|<tools>"
|
|
1626
|
+
"|<response>"
|
|
1627
|
+
"|<json>"
|
|
1628
|
+
"|<xml>"
|
|
1629
|
+
"|<JSON>"
|
|
1461
1630
|
")?"
|
|
1462
|
-
"(\\s*\\{\\s*\"name\"
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1631
|
+
"(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
|
|
1632
|
+
")"
|
|
1633
|
+
"|<function=([^>]+)>" // match 4 (function name)
|
|
1634
|
+
"|<function name=\"([^\"]+)\">" // match 5 (function name again)
|
|
1635
|
+
);
|
|
1636
|
+
|
|
1637
|
+
if (auto res = builder.try_find_regex(open_regex)) {
|
|
1638
|
+
const auto & block_start = res->groups[1];
|
|
1639
|
+
std::string block_end = block_start.empty() ? "" : "```";
|
|
1640
|
+
|
|
1641
|
+
const auto & open_tag = res->groups[2];
|
|
1642
|
+
std::string close_tag;
|
|
1643
|
+
|
|
1644
|
+
if (!res->groups[3].empty()) {
|
|
1645
|
+
builder.move_to(res->groups[3].begin);
|
|
1646
|
+
close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
|
|
1647
|
+
|
|
1648
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
|
|
1649
|
+
if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
|
|
1650
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1651
|
+
}
|
|
1652
|
+
builder.consume_spaces();
|
|
1653
|
+
builder.consume_literal(close_tag);
|
|
1654
|
+
builder.consume_spaces();
|
|
1655
|
+
if (!block_end.empty()) {
|
|
1656
|
+
builder.consume_literal(block_end);
|
|
1657
|
+
builder.consume_spaces();
|
|
1658
|
+
}
|
|
1659
|
+
builder.add_content(builder.consume_rest());
|
|
1660
|
+
} else {
|
|
1661
|
+
throw common_chat_msg_partial_exception("failed to parse tool call");
|
|
1662
|
+
}
|
|
1663
|
+
} else {
|
|
1664
|
+
auto function_name = builder.str(res->groups[4]);
|
|
1665
|
+
if (function_name.empty()) {
|
|
1666
|
+
function_name = builder.str(res->groups[5]);
|
|
1667
|
+
}
|
|
1668
|
+
GGML_ASSERT(!function_name.empty());
|
|
1494
1669
|
|
|
1495
|
-
|
|
1496
|
-
it = json_it; // Move iterator past parsed JSON
|
|
1670
|
+
close_tag = "</function>";
|
|
1497
1671
|
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
} else {
|
|
1509
|
-
// Not a valid tool call, treat as content
|
|
1510
|
-
msg.content += std::string(match[0].first, match[0].second);
|
|
1511
|
-
it = match[0].second;
|
|
1512
|
-
}
|
|
1513
|
-
} else {
|
|
1514
|
-
auto function_name = match[4].str();
|
|
1515
|
-
if (function_name.empty()) {
|
|
1516
|
-
function_name = match[5].str();
|
|
1517
|
-
}
|
|
1518
|
-
GGML_ASSERT(!function_name.empty());
|
|
1519
|
-
|
|
1520
|
-
close_tag = "</function>";
|
|
1521
|
-
// Start parsing from after the opening tags
|
|
1522
|
-
auto json_it = match[6].first;
|
|
1523
|
-
json arguments;
|
|
1524
|
-
if (parse_json(json_it, end, arguments)) {
|
|
1525
|
-
msg.tool_calls.emplace_back(process_tool_call({
|
|
1526
|
-
{"name", function_name},
|
|
1527
|
-
{"arguments", arguments},
|
|
1528
|
-
}));
|
|
1529
|
-
it = json_it; // Move iterator past parsed JSON
|
|
1530
|
-
|
|
1531
|
-
// Handle close tags
|
|
1532
|
-
consume_spaces(it, end);
|
|
1533
|
-
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
|
1534
|
-
throw std::runtime_error("Failed to parse closing tag");
|
|
1535
|
-
}
|
|
1536
|
-
consume_spaces(it, end);
|
|
1537
|
-
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
|
1538
|
-
throw std::runtime_error("Failed to parse block end");
|
|
1539
|
-
}
|
|
1540
|
-
consume_spaces(it, end);
|
|
1541
|
-
} else {
|
|
1542
|
-
// Not a valid tool call, treat as content
|
|
1543
|
-
msg.content += std::string(match[0].first, match[0].second);
|
|
1544
|
-
it = match[0].second;
|
|
1545
|
-
}
|
|
1546
|
-
}
|
|
1547
|
-
} else {
|
|
1548
|
-
// Add remaining content
|
|
1549
|
-
msg.content += std::string(it, end);
|
|
1550
|
-
break;
|
|
1672
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
|
1673
|
+
if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
|
|
1674
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1675
|
+
}
|
|
1676
|
+
builder.consume_spaces();
|
|
1677
|
+
builder.consume_literal(close_tag);
|
|
1678
|
+
builder.consume_spaces();
|
|
1679
|
+
if (!block_end.empty()) {
|
|
1680
|
+
builder.consume_literal(block_end);
|
|
1681
|
+
builder.consume_spaces();
|
|
1551
1682
|
}
|
|
1552
1683
|
}
|
|
1553
|
-
|
|
1554
|
-
} catch (const std::exception & e) {
|
|
1555
|
-
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
|
|
1556
|
-
common_chat_msg msg;
|
|
1557
|
-
msg.role = "assistant";
|
|
1558
|
-
msg.content = input;
|
|
1559
|
-
return msg;
|
|
1684
|
+
builder.add_content(builder.consume_rest());
|
|
1560
1685
|
}
|
|
1561
|
-
}
|
|
1686
|
+
} else {
|
|
1687
|
+
builder.add_content(builder.consume_rest());
|
|
1688
|
+
}
|
|
1562
1689
|
}
|
|
1563
1690
|
|
|
1564
1691
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
@@ -1590,9 +1717,10 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1590
1717
|
const auto & caps = tmpl.original_caps();
|
|
1591
1718
|
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
|
|
1592
1719
|
params.add_generation_prompt = inputs.add_generation_prompt;
|
|
1593
|
-
params.extract_reasoning = inputs.extract_reasoning;
|
|
1594
1720
|
params.tool_choice = inputs.tool_choice;
|
|
1721
|
+
params.enable_thinking = inputs.enable_thinking;
|
|
1595
1722
|
params.grammar = inputs.grammar;
|
|
1723
|
+
params.now = inputs.now;
|
|
1596
1724
|
if (!inputs.json_schema.empty()) {
|
|
1597
1725
|
params.json_schema = json::parse(inputs.json_schema);
|
|
1598
1726
|
}
|
|
@@ -1624,7 +1752,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1624
1752
|
}
|
|
1625
1753
|
|
|
1626
1754
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
1627
|
-
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()
|
|
1755
|
+
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
|
1628
1756
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
1629
1757
|
}
|
|
1630
1758
|
|
|
@@ -1644,21 +1772,21 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
1644
1772
|
return common_chat_params_init_firefunction_v2(tmpl, params);
|
|
1645
1773
|
}
|
|
1646
1774
|
|
|
1647
|
-
// Plain handler (no tools)
|
|
1648
|
-
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1649
|
-
return common_chat_params_init_without_tools(tmpl, params);
|
|
1650
|
-
}
|
|
1651
|
-
|
|
1652
1775
|
// Functionary v3.1 (w/ tools)
|
|
1653
1776
|
if (src.find("<|start_header_id|>") != std::string::npos
|
|
1654
1777
|
&& src.find("<function=") != std::string::npos) {
|
|
1655
1778
|
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
|
|
1656
1779
|
}
|
|
1657
1780
|
|
|
1658
|
-
// Llama 3.1, 3.2, 3.3 (w/ tools)
|
|
1781
|
+
// Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
|
|
1659
1782
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
|
1660
1783
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
|
1661
|
-
return
|
|
1784
|
+
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
|
1785
|
+
}
|
|
1786
|
+
|
|
1787
|
+
// Plain handler (no tools)
|
|
1788
|
+
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1789
|
+
return common_chat_params_init_without_tools(tmpl, params);
|
|
1662
1790
|
}
|
|
1663
1791
|
|
|
1664
1792
|
// Mistral Nemo (w/ tools)
|
|
@@ -1738,44 +1866,64 @@ common_chat_params common_chat_templates_apply(
|
|
|
1738
1866
|
: common_chat_templates_apply_legacy(tmpls, inputs);
|
|
1739
1867
|
}
|
|
1740
1868
|
|
|
1741
|
-
static
|
|
1742
|
-
|
|
1743
|
-
msg.role = "assistant";
|
|
1744
|
-
msg.content = input;
|
|
1745
|
-
return msg;
|
|
1869
|
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
|
1870
|
+
builder.add_content(builder.consume_rest());
|
|
1746
1871
|
}
|
|
1747
1872
|
|
|
1748
|
-
|
|
1749
|
-
|
|
1873
|
+
static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
1874
|
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
|
|
1875
|
+
|
|
1876
|
+
switch (builder.syntax().format) {
|
|
1750
1877
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
|
1751
|
-
|
|
1878
|
+
common_chat_parse_content_only(builder);
|
|
1879
|
+
break;
|
|
1752
1880
|
case COMMON_CHAT_FORMAT_GENERIC:
|
|
1753
|
-
|
|
1881
|
+
common_chat_parse_generic(builder);
|
|
1882
|
+
break;
|
|
1754
1883
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
|
1755
|
-
|
|
1884
|
+
common_chat_parse_mistral_nemo(builder);
|
|
1885
|
+
break;
|
|
1756
1886
|
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
|
1757
|
-
|
|
1887
|
+
common_chat_parse_llama_3_1(builder);
|
|
1888
|
+
break;
|
|
1758
1889
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
|
1759
|
-
|
|
1890
|
+
common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
|
|
1891
|
+
break;
|
|
1760
1892
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
|
|
1893
|
+
common_chat_parse_deepseek_r1(builder);
|
|
1894
|
+
break;
|
|
1764
1895
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
|
1765
|
-
|
|
1896
|
+
common_chat_parse_functionary_v3_2(builder);
|
|
1897
|
+
break;
|
|
1766
1898
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
|
1767
|
-
|
|
1899
|
+
common_chat_parse_functionary_v3_1_llama_3_1(builder);
|
|
1900
|
+
break;
|
|
1768
1901
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
|
|
1902
|
+
common_chat_parse_hermes_2_pro(builder);
|
|
1903
|
+
break;
|
|
1772
1904
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
|
1773
|
-
|
|
1905
|
+
common_chat_parse_firefunction_v2(builder);
|
|
1906
|
+
break;
|
|
1774
1907
|
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
|
|
1908
|
+
common_chat_parse_command_r7b(builder);
|
|
1909
|
+
break;
|
|
1778
1910
|
default:
|
|
1779
|
-
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
|
1911
|
+
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
|
1780
1912
|
}
|
|
1913
|
+
builder.finish();
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1916
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
|
|
1917
|
+
common_chat_msg_parser builder(input, is_partial, syntax);
|
|
1918
|
+
try {
|
|
1919
|
+
common_chat_parse(builder);
|
|
1920
|
+
} catch (const common_chat_msg_partial_exception & ex) {
|
|
1921
|
+
LOG_DBG("Partial parse: %s\n", ex.what());
|
|
1922
|
+
if (!is_partial) {
|
|
1923
|
+
throw std::runtime_error(ex.what());
|
|
1924
|
+
}
|
|
1925
|
+
}
|
|
1926
|
+
auto msg = builder.result();
|
|
1927
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
|
|
1928
|
+
return msg;
|
|
1781
1929
|
}
|