cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/chat.cpp
CHANGED
@@ -1,8 +1,18 @@
|
|
1
1
|
#include "chat.h"
|
2
|
+
#include "chat-parser.h"
|
3
|
+
#include "common.h"
|
4
|
+
#include "json-partial.h"
|
2
5
|
#include "json-schema-to-grammar.h"
|
3
6
|
#include "log.h"
|
7
|
+
#include "regex-partial.h"
|
4
8
|
|
9
|
+
#include <cstdio>
|
10
|
+
#include <exception>
|
11
|
+
#include <iostream>
|
5
12
|
#include <optional>
|
13
|
+
#include <stdexcept>
|
14
|
+
#include <string>
|
15
|
+
#include <vector>
|
6
16
|
|
7
17
|
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
|
8
18
|
auto time = std::chrono::system_clock::to_time_t(now);
|
@@ -13,6 +23,101 @@ static std::string format_time(const std::chrono::system_clock::time_point & now
|
|
13
23
|
return res;
|
14
24
|
}
|
15
25
|
|
26
|
+
static std::string string_diff(const std::string & last, const std::string & current) {
|
27
|
+
if (last.empty()) {
|
28
|
+
return current;
|
29
|
+
}
|
30
|
+
if (!string_starts_with(current, last)) {
|
31
|
+
if (string_starts_with(last, current)) {
|
32
|
+
// This happens if the last generation ended on a partial stop word (not erased),
|
33
|
+
// and the current ended on a stop word (erased).
|
34
|
+
return "";
|
35
|
+
}
|
36
|
+
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
|
37
|
+
}
|
38
|
+
return current.substr(last.size());
|
39
|
+
}
|
40
|
+
|
41
|
+
static bool has_content_or_tool_calls(const common_chat_msg & msg) {
|
42
|
+
return !msg.content.empty() || !msg.tool_calls.empty();
|
43
|
+
}
|
44
|
+
|
45
|
+
template <>
|
46
|
+
json common_chat_msg::to_json_oaicompat() const
|
47
|
+
{
|
48
|
+
json message {
|
49
|
+
{"role", "assistant"},
|
50
|
+
};
|
51
|
+
if (!reasoning_content.empty()) {
|
52
|
+
message["reasoning_content"] = reasoning_content;
|
53
|
+
}
|
54
|
+
if (content.empty() && !tool_calls.empty()) {
|
55
|
+
message["content"] = json();
|
56
|
+
} else {
|
57
|
+
message["content"] = content;
|
58
|
+
}
|
59
|
+
if (!tool_calls.empty()) {
|
60
|
+
auto arr = json::array();
|
61
|
+
for (const auto & tc : tool_calls) {
|
62
|
+
arr.push_back({
|
63
|
+
{"type", "function"},
|
64
|
+
{"function", {
|
65
|
+
{"name", tc.name},
|
66
|
+
{"arguments", tc.arguments},
|
67
|
+
}},
|
68
|
+
{"id", tc.id},
|
69
|
+
// // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
|
70
|
+
// // We only generate a random id for the ones that don't generate one by themselves
|
71
|
+
// // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
|
72
|
+
// {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
|
73
|
+
});
|
74
|
+
}
|
75
|
+
message["tool_calls"] = arr;
|
76
|
+
}
|
77
|
+
return message;
|
78
|
+
}
|
79
|
+
|
80
|
+
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
|
81
|
+
std::vector<common_chat_msg_diff> diffs;
|
82
|
+
if (previous_msg.reasoning_content != new_msg.reasoning_content) {
|
83
|
+
auto & diff = diffs.emplace_back();
|
84
|
+
diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
|
85
|
+
}
|
86
|
+
if (previous_msg.content != new_msg.content) {
|
87
|
+
auto & diff = diffs.emplace_back();
|
88
|
+
diff.content_delta = string_diff(previous_msg.content, new_msg.content);
|
89
|
+
}
|
90
|
+
|
91
|
+
if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
|
92
|
+
throw std::runtime_error("Invalid diff: now finding less tool calls!");
|
93
|
+
}
|
94
|
+
|
95
|
+
if (!previous_msg.tool_calls.empty()) {
|
96
|
+
auto idx = previous_msg.tool_calls.size() - 1;
|
97
|
+
const auto & pref = previous_msg.tool_calls[idx];
|
98
|
+
const auto & newf = new_msg.tool_calls[idx];
|
99
|
+
if (pref.name != newf.name) {
|
100
|
+
throw std::runtime_error("Invalid diff: tool call mismatch!");
|
101
|
+
}
|
102
|
+
auto args_diff = string_diff(pref.arguments, newf.arguments);
|
103
|
+
if (!args_diff.empty() || pref.id != newf.id) {
|
104
|
+
auto & diff = diffs.emplace_back();
|
105
|
+
diff.tool_call_index = idx;
|
106
|
+
if (pref.id != newf.id) {
|
107
|
+
diff.tool_call_delta.id = newf.id;
|
108
|
+
diff.tool_call_delta.name = newf.name;
|
109
|
+
}
|
110
|
+
diff.tool_call_delta.arguments = args_diff;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
|
114
|
+
auto & diff = diffs.emplace_back();
|
115
|
+
diff.tool_call_index = idx;
|
116
|
+
diff.tool_call_delta = new_msg.tool_calls[idx];
|
117
|
+
}
|
118
|
+
return diffs;
|
119
|
+
}
|
120
|
+
|
16
121
|
struct templates_params {
|
17
122
|
json messages;
|
18
123
|
json tools;
|
@@ -22,7 +127,7 @@ struct templates_params {
|
|
22
127
|
bool stream;
|
23
128
|
std::string grammar;
|
24
129
|
bool add_generation_prompt = true;
|
25
|
-
bool
|
130
|
+
bool enable_thinking = true;
|
26
131
|
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
27
132
|
};
|
28
133
|
|
@@ -267,6 +372,32 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
|
|
267
372
|
return result;
|
268
373
|
}
|
269
374
|
|
375
|
+
template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
|
376
|
+
json delta = json::object();
|
377
|
+
if (!diff.reasoning_content_delta.empty()) {
|
378
|
+
delta["reasoning_content"] = diff.reasoning_content_delta;
|
379
|
+
}
|
380
|
+
if (!diff.content_delta.empty()) {
|
381
|
+
delta["content"] = diff.content_delta;
|
382
|
+
}
|
383
|
+
if (diff.tool_call_index != std::string::npos) {
|
384
|
+
json tool_call;
|
385
|
+
tool_call["index"] = diff.tool_call_index;
|
386
|
+
if (!diff.tool_call_delta.id.empty()) {
|
387
|
+
tool_call["id"] = diff.tool_call_delta.id;
|
388
|
+
tool_call["type"] = "function";
|
389
|
+
}
|
390
|
+
json function = json::object();
|
391
|
+
if (!diff.tool_call_delta.name.empty()) {
|
392
|
+
function["name"] = diff.tool_call_delta.name;
|
393
|
+
}
|
394
|
+
function["arguments"] = diff.tool_call_delta.arguments;
|
395
|
+
tool_call["function"] = function;
|
396
|
+
delta["tool_calls"] = json::array({tool_call});
|
397
|
+
}
|
398
|
+
return delta;
|
399
|
+
}
|
400
|
+
|
270
401
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
|
271
402
|
if (use_jinja) {
|
272
403
|
try {
|
@@ -434,7 +565,7 @@ common_chat_templates_ptr common_chat_templates_init(
|
|
434
565
|
return tmpls;
|
435
566
|
}
|
436
567
|
|
437
|
-
|
568
|
+
const char * common_chat_format_name(common_chat_format format) {
|
438
569
|
switch (format) {
|
439
570
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
440
571
|
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
@@ -442,182 +573,128 @@ std::string common_chat_format_name(common_chat_format format) {
|
|
442
573
|
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
443
574
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
444
575
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
445
|
-
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
|
446
576
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
447
577
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
448
578
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
449
579
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
450
|
-
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
|
451
580
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
452
|
-
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
453
581
|
default:
|
454
582
|
throw std::runtime_error("Unknown chat format");
|
455
583
|
}
|
456
584
|
}
|
457
585
|
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
bool parse_error(std::size_t position, const std::string &, const json::exception &) override { // NOLINT
|
467
|
-
this->position = position - 1;
|
468
|
-
this->found_error = true;
|
469
|
-
return false;
|
470
|
-
}
|
471
|
-
bool null() override { return true; } // NOLINT
|
472
|
-
bool boolean(bool) override { return true; } // NOLINT
|
473
|
-
bool number_integer(number_integer_t) override { return true; } // NOLINT
|
474
|
-
bool number_unsigned(number_unsigned_t) override { return true; } // NOLINT
|
475
|
-
bool number_float(number_float_t, const string_t &) override { return true; } // NOLINT
|
476
|
-
bool string(string_t &) override { return true; } // NOLINT
|
477
|
-
bool binary(binary_t &) override { return true; } // NOLINT
|
478
|
-
bool start_object(std::size_t) override { return true; } // NOLINT
|
479
|
-
bool key(string_t &) override { return true; } // NOLINT
|
480
|
-
bool end_object() override { return true; }
|
481
|
-
bool start_array(std::size_t) override { return true; } // NOLINT
|
482
|
-
bool end_array() override { return true; }
|
483
|
-
};
|
484
|
-
json_error_locator err_loc;
|
485
|
-
json::sax_parse(it, end, &err_loc);
|
486
|
-
|
487
|
-
std::string::const_iterator temptative_end;
|
488
|
-
if (err_loc.found_error) {
|
489
|
-
temptative_end = it + err_loc.position;
|
490
|
-
} else {
|
491
|
-
temptative_end = end;
|
492
|
-
}
|
493
|
-
std::string json_sub {it, temptative_end};
|
494
|
-
try {
|
495
|
-
out = json::parse(json_sub);
|
496
|
-
it = temptative_end;
|
497
|
-
return true;
|
498
|
-
} catch (const std::exception &) {
|
499
|
-
return false;
|
500
|
-
}
|
501
|
-
}
|
502
|
-
|
503
|
-
static bool parse_literal(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
|
504
|
-
auto expected_it = expected.begin();
|
505
|
-
auto tmp_it = it;
|
506
|
-
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
|
507
|
-
++tmp_it;
|
508
|
-
++expected_it;
|
509
|
-
}
|
510
|
-
if (expected_it == expected.end()) {
|
511
|
-
it = tmp_it;
|
512
|
-
return true;
|
513
|
-
}
|
514
|
-
return false;
|
515
|
-
}
|
516
|
-
|
517
|
-
static std::optional<std::smatch> parse_pattern(std::string::const_iterator & it, const std::string::const_iterator & end, const std::regex & expected) {
|
518
|
-
std::smatch match;
|
519
|
-
if (std::regex_match(it, end, match, expected)) {
|
520
|
-
it = match.suffix().first;
|
521
|
-
return match;
|
586
|
+
const char * common_reasoning_format_name(common_reasoning_format format) {
|
587
|
+
switch (format) {
|
588
|
+
case COMMON_REASONING_FORMAT_NONE: return "none";
|
589
|
+
case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
|
590
|
+
case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
|
591
|
+
default:
|
592
|
+
throw std::runtime_error("Unknown reasoning format");
|
522
593
|
}
|
523
|
-
return std::nullopt;
|
524
594
|
}
|
525
595
|
|
526
|
-
static
|
527
|
-
|
528
|
-
|
596
|
+
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
|
597
|
+
std::string arguments;
|
598
|
+
if (builder.is_partial()) {
|
599
|
+
arguments = (json {{"code", code + builder.healing_marker()}}).dump();
|
600
|
+
auto idx = arguments.find(builder.healing_marker());
|
601
|
+
if (idx != std::string::npos) {
|
602
|
+
arguments.resize(idx);
|
603
|
+
}
|
604
|
+
} else {
|
605
|
+
arguments = (json {{"code", code}}).dump();
|
529
606
|
}
|
607
|
+
return arguments;
|
530
608
|
}
|
531
609
|
|
532
610
|
/**
|
533
611
|
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
534
612
|
* Aggregates the prefix, suffix and in-between text into the content.
|
535
613
|
*/
|
536
|
-
static
|
537
|
-
|
538
|
-
const std::optional<
|
539
|
-
const std::
|
540
|
-
const std::
|
541
|
-
|
542
|
-
std::
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
614
|
+
static void parse_json_tool_calls(
|
615
|
+
common_chat_msg_parser & builder,
|
616
|
+
const std::optional<common_regex> & block_open,
|
617
|
+
const std::optional<common_regex> & function_regex_start_only,
|
618
|
+
const std::optional<common_regex> & function_regex,
|
619
|
+
const common_regex & close_regex,
|
620
|
+
const std::optional<common_regex> & block_close,
|
621
|
+
bool allow_raw_python = false,
|
622
|
+
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
|
623
|
+
|
624
|
+
auto parse_tool_calls = [&]() {
|
625
|
+
size_t from = std::string::npos;
|
626
|
+
auto first = true;
|
627
|
+
while (true) {
|
628
|
+
auto res = function_regex_start_only && first
|
629
|
+
? builder.try_consume_regex(*function_regex_start_only)
|
630
|
+
: function_regex
|
631
|
+
? builder.try_find_regex(*function_regex, from)
|
632
|
+
: std::nullopt;
|
633
|
+
if (res) {
|
634
|
+
std::string name;
|
635
|
+
if (get_function_name) {
|
636
|
+
name = get_function_name(*res);
|
637
|
+
} else {
|
638
|
+
LM_GGML_ASSERT(res->groups.size() == 2);
|
639
|
+
name = builder.str(res->groups[1]);
|
640
|
+
}
|
641
|
+
first = false;
|
642
|
+
if (name.empty()) {
|
643
|
+
// get_function_name signalled us that we should skip this match and treat it as content.
|
644
|
+
from = res->groups[0].begin + 1;
|
645
|
+
continue;
|
646
|
+
}
|
647
|
+
from = std::string::npos;
|
559
648
|
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
649
|
+
auto maybe_raw_python = name == "python" && allow_raw_python;
|
650
|
+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
|
651
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
652
|
+
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
|
653
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
654
|
+
}
|
655
|
+
builder.consume_regex(close_regex);
|
656
|
+
}
|
657
|
+
continue;
|
658
|
+
}
|
659
|
+
if (maybe_raw_python) {
|
660
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
661
|
+
if (!builder.add_tool_call(name, "", arguments)) {
|
662
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
663
|
+
}
|
664
|
+
return;
|
665
|
+
}
|
666
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
667
|
+
}
|
565
668
|
break;
|
566
669
|
}
|
567
|
-
|
568
|
-
|
569
|
-
it = rit->suffix().first;
|
570
|
-
|
571
|
-
json arguments;
|
572
|
-
if (parse_json(it, end, arguments)) {
|
573
|
-
if (!std::regex_search(it, end, match, close_regex)) {
|
574
|
-
throw std::runtime_error("Malformed input, missing closing pattern: " + input);
|
575
|
-
}
|
576
|
-
it = match.suffix().first;
|
577
|
-
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
578
|
-
} else {
|
579
|
-
if (allow_raw_python && name == "python") {
|
580
|
-
result.tool_calls.push_back({name, json({{"code", std::string(it, end)}}).dump(), /* id= */ ""});
|
581
|
-
break;
|
582
|
-
}
|
583
|
-
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
|
670
|
+
if (block_close) {
|
671
|
+
builder.consume_regex(*block_close);
|
584
672
|
}
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
673
|
+
builder.consume_spaces();
|
674
|
+
builder.add_content(builder.consume_rest());
|
675
|
+
};
|
676
|
+
if (block_open) {
|
677
|
+
if (auto res = builder.try_find_regex(*block_open)) {
|
678
|
+
parse_tool_calls();
|
679
|
+
} else {
|
680
|
+
builder.add_content(builder.consume_rest());
|
590
681
|
}
|
591
|
-
|
682
|
+
} else {
|
683
|
+
parse_tool_calls();
|
592
684
|
}
|
593
|
-
return result;
|
594
685
|
}
|
595
686
|
|
596
|
-
static
|
597
|
-
const
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
}
|
604
|
-
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
|
605
|
-
auto content_end = input.find(prefix);
|
606
|
-
size_t tc_start = std::string::npos;
|
607
|
-
|
608
|
-
common_chat_msg result;
|
609
|
-
result.role = "assistant";
|
610
|
-
if (content_end == std::string::npos) {
|
611
|
-
result.content = input;
|
612
|
-
} else {
|
613
|
-
tc_start = content_end + prefix.size() - rstrip_prefix;
|
614
|
-
result.content = input.substr(0, content_end);
|
615
|
-
auto tool_calls = json::parse(input.substr(tc_start));
|
616
|
-
for (const auto & tool_call : tool_calls) {
|
617
|
-
result.tool_calls.emplace_back(process_tool_call(tool_call));
|
687
|
+
static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
|
688
|
+
static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
|
689
|
+
if (auto res = builder.try_find_regex(prefix)) {
|
690
|
+
builder.move_back(rstrip_prefix);
|
691
|
+
auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
|
692
|
+
if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
|
693
|
+
throw common_chat_msg_partial_exception("incomplete tool call array");
|
618
694
|
}
|
695
|
+
} else {
|
696
|
+
builder.add_content(builder.consume_rest());
|
619
697
|
}
|
620
|
-
return result;
|
621
698
|
}
|
622
699
|
|
623
700
|
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
@@ -744,29 +821,36 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
|
|
744
821
|
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
745
822
|
return data;
|
746
823
|
}
|
747
|
-
static
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
824
|
+
static void common_chat_parse_generic(common_chat_msg_parser & builder) {
|
825
|
+
if (!builder.syntax().parse_tool_calls) {
|
826
|
+
builder.add_content(builder.consume_rest());
|
827
|
+
return;
|
828
|
+
}
|
829
|
+
static const std::vector<std::vector<std::string>> content_paths = {
|
830
|
+
{"response"},
|
831
|
+
};
|
832
|
+
static const std::vector<std::vector<std::string>> args_paths = {
|
833
|
+
{"tool_call", "arguments"},
|
834
|
+
{"tool_calls", "arguments"},
|
835
|
+
};
|
836
|
+
auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
|
837
|
+
if (data.value.contains("tool_calls")) {
|
838
|
+
if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
|
839
|
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
758
840
|
}
|
759
|
-
} else if (data.contains("tool_call")) {
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
841
|
+
} else if (data.value.contains("tool_call")) {
|
842
|
+
if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
|
843
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
844
|
+
}
|
845
|
+
} else if (data.value.contains("response")) {
|
846
|
+
const auto & response = data.value.at("response");
|
847
|
+
builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
|
848
|
+
if (data.is_partial) {
|
849
|
+
throw common_chat_msg_partial_exception("incomplete response");
|
850
|
+
}
|
851
|
+
} else {
|
852
|
+
throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
|
768
853
|
}
|
769
|
-
return result;
|
770
854
|
}
|
771
855
|
|
772
856
|
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
@@ -813,12 +897,44 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
813
897
|
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
814
898
|
return data;
|
815
899
|
}
|
816
|
-
static
|
817
|
-
|
900
|
+
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
901
|
+
if (!builder.syntax().parse_tool_calls) {
|
902
|
+
builder.add_content(builder.consume_rest());
|
903
|
+
return;
|
904
|
+
}
|
905
|
+
|
906
|
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
907
|
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
818
908
|
}
|
819
909
|
|
820
910
|
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
821
911
|
common_chat_params data;
|
912
|
+
|
913
|
+
auto adjusted_messages = json::array();
|
914
|
+
for (const auto & msg : inputs.messages) {
|
915
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
916
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
917
|
+
if (has_reasoning_content && has_tool_calls) {
|
918
|
+
auto adjusted_message = msg;
|
919
|
+
adjusted_message["tool_plan"] = msg.at("reasoning_content");
|
920
|
+
adjusted_message.erase("reasoning_content");
|
921
|
+
adjusted_messages.push_back(adjusted_message);
|
922
|
+
} else {
|
923
|
+
adjusted_messages.push_back(msg);
|
924
|
+
}
|
925
|
+
}
|
926
|
+
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
927
|
+
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
928
|
+
if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
|
929
|
+
if (!inputs.enable_thinking) {
|
930
|
+
data.prompt += "<|END_THINKING|>";
|
931
|
+
} else {
|
932
|
+
data.thinking_forced_open = true;
|
933
|
+
}
|
934
|
+
} else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
|
935
|
+
data.prompt += "<|START_THINKING|><|END_THINKING|>";
|
936
|
+
}
|
937
|
+
|
822
938
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
823
939
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
824
940
|
auto schemas = json::array();
|
@@ -849,11 +965,16 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
849
965
|
if (!inputs.parallel_tool_calls) {
|
850
966
|
schema["maxItems"] = 1;
|
851
967
|
}
|
852
|
-
builder.add_rule("root",
|
968
|
+
builder.add_rule("root",
|
969
|
+
std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
|
970
|
+
"\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
|
853
971
|
});
|
854
972
|
data.grammar_triggers.push_back({
|
855
|
-
|
856
|
-
|
973
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
974
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
975
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
976
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
|
977
|
+
"(<\\|START_ACTION\\|>)[\\s\\S]*"
|
857
978
|
});
|
858
979
|
data.preserved_tokens = {
|
859
980
|
"<|START_ACTION|>",
|
@@ -863,61 +984,40 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
863
984
|
"<|START_THINKING|>",
|
864
985
|
"<|END_THINKING|>",
|
865
986
|
};
|
866
|
-
auto adjusted_messages = json::array();
|
867
|
-
for (const auto & msg : inputs.messages) {
|
868
|
-
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
869
|
-
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
870
|
-
if (has_reasoning_content && has_tool_calls) {
|
871
|
-
auto adjusted_message = msg;
|
872
|
-
adjusted_message["tool_plan"] = msg.at("reasoning_content");
|
873
|
-
adjusted_message.erase("reasoning_content");
|
874
|
-
adjusted_messages.push_back(adjusted_message);
|
875
|
-
} else {
|
876
|
-
adjusted_messages.push_back(msg);
|
877
|
-
}
|
878
|
-
}
|
879
|
-
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
880
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
|
881
987
|
return data;
|
882
988
|
}
|
883
|
-
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
884
|
-
static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
885
|
-
static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
886
|
-
static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
887
|
-
|
888
|
-
std::smatch match;
|
889
|
-
|
890
|
-
common_chat_msg result;
|
891
|
-
result.role = "assistant";
|
892
989
|
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
990
|
+
static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
|
991
|
+
builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
|
992
|
+
|
993
|
+
static const common_regex start_action_regex("<\\|START_ACTION\\|>");
|
994
|
+
static const common_regex end_action_regex("<\\|END_ACTION\\|>");
|
995
|
+
static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
|
996
|
+
static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
|
997
|
+
|
998
|
+
if (auto res = builder.try_find_regex(start_action_regex)) {
|
999
|
+
// If we didn't extract thoughts, prelude includes them.
|
1000
|
+
auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
|
1001
|
+
for (const auto & tool_call : tool_calls.value) {
|
1002
|
+
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
|
1003
|
+
std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
|
1004
|
+
std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
|
1005
|
+
if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
|
1006
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
1007
|
+
}
|
901
1008
|
}
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
/* .arguments = */ action.at("parameters").dump(),
|
911
|
-
/* .id = */ action.at("tool_call_id"),
|
912
|
-
});
|
1009
|
+
if (tool_calls.is_partial) {
|
1010
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
1011
|
+
}
|
1012
|
+
builder.consume_regex(end_action_regex);
|
1013
|
+
} else if (auto res = builder.try_find_regex(start_response_regex)) {
|
1014
|
+
if (!builder.try_find_regex(end_response_regex)) {
|
1015
|
+
builder.add_content(builder.consume_rest());
|
1016
|
+
throw common_chat_msg_partial_exception(end_response_regex.str());
|
913
1017
|
}
|
914
|
-
} else if (std::regex_match(rest, match, response_regex)) {
|
915
|
-
auto response = match[1].str();
|
916
|
-
result.content += response;
|
917
1018
|
} else {
|
918
|
-
|
1019
|
+
builder.add_content(builder.consume_rest());
|
919
1020
|
}
|
920
|
-
return result;
|
921
1021
|
}
|
922
1022
|
|
923
1023
|
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
@@ -994,8 +1094,8 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
|
|
994
1094
|
});
|
995
1095
|
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
|
996
1096
|
data.grammar_triggers.push_back({
|
997
|
-
|
998
|
-
"\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
|
1097
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
1098
|
+
"(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
|
999
1099
|
});
|
1000
1100
|
if (!builtin_tools.empty()) {
|
1001
1101
|
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
@@ -1018,42 +1118,93 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
|
|
1018
1118
|
});
|
1019
1119
|
return data;
|
1020
1120
|
}
|
1021
|
-
static
|
1022
|
-
|
1023
|
-
|
1121
|
+
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
|
1122
|
+
if (!builder.syntax().parse_tool_calls) {
|
1123
|
+
builder.add_content(builder.consume_rest());
|
1124
|
+
return;
|
1125
|
+
}
|
1126
|
+
|
1127
|
+
static const common_regex function_regex(
|
1024
1128
|
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
1025
|
-
static const
|
1026
|
-
|
1129
|
+
static const common_regex close_regex("\\}\\s*");
|
1130
|
+
|
1131
|
+
static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
|
1132
|
+
static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
|
1027
1133
|
|
1028
1134
|
if (with_builtin_tools) {
|
1029
|
-
|
1030
|
-
if (
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1135
|
+
static const common_regex builtin_call_regex("<\\|python_tag\\|>");
|
1136
|
+
if (auto res = builder.try_find_regex(builtin_call_regex)) {
|
1137
|
+
auto fun_res = builder.consume_regex(function_name_regex);
|
1138
|
+
auto function_name = builder.str(fun_res.groups[1]);
|
1139
|
+
|
1140
|
+
common_healing_marker healing_marker;
|
1141
|
+
json args = json::object();
|
1142
|
+
while (true) {
|
1143
|
+
if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
|
1144
|
+
auto arg_name = builder.str(arg_res->groups[1]);
|
1145
|
+
auto partial = builder.consume_json();
|
1146
|
+
args[arg_name] = partial.json;
|
1147
|
+
healing_marker.marker = partial.healing_marker.marker;
|
1148
|
+
healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
|
1149
|
+
builder.consume_spaces();
|
1150
|
+
if (!builder.try_consume_literal(",")) {
|
1151
|
+
break;
|
1152
|
+
}
|
1153
|
+
} else {
|
1154
|
+
break;
|
1155
|
+
}
|
1156
|
+
}
|
1157
|
+
builder.consume_literal(")");
|
1158
|
+
builder.consume_spaces();
|
1159
|
+
|
1160
|
+
auto arguments = args.dump();
|
1161
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
1162
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
1049
1163
|
}
|
1164
|
+
return;
|
1050
1165
|
}
|
1051
1166
|
}
|
1052
|
-
|
1167
|
+
parse_json_tool_calls(
|
1168
|
+
builder,
|
1169
|
+
/* block_open= */ std::nullopt,
|
1170
|
+
/* function_regex_start_only= */ function_regex,
|
1171
|
+
/* function_regex= */ std::nullopt,
|
1172
|
+
close_regex,
|
1173
|
+
std::nullopt);
|
1174
|
+
|
1053
1175
|
}
|
1054
1176
|
|
1055
1177
|
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
1056
1178
|
common_chat_params data;
|
1179
|
+
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
1180
|
+
|
1181
|
+
// Hacks to fix the official (broken) prompt.
|
1182
|
+
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
1183
|
+
// until the official template is fixed.
|
1184
|
+
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
1185
|
+
// Don't leave the chat dangling after tool results
|
1186
|
+
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
1187
|
+
prompt += "<|end▁of▁sentence|>";
|
1188
|
+
if (inputs.add_generation_prompt) {
|
1189
|
+
prompt += "<|Assistant|>";
|
1190
|
+
}
|
1191
|
+
}
|
1192
|
+
// Fix up tool call delta example added by Minja
|
1193
|
+
prompt = std::regex_replace(
|
1194
|
+
prompt,
|
1195
|
+
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
1196
|
+
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
1197
|
+
}
|
1198
|
+
data.prompt = prompt;
|
1199
|
+
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
1200
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
1201
|
+
if (!inputs.enable_thinking) {
|
1202
|
+
data.prompt += "</think>";
|
1203
|
+
} else {
|
1204
|
+
data.thinking_forced_open = true;
|
1205
|
+
}
|
1206
|
+
}
|
1207
|
+
|
1057
1208
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
1058
1209
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
1059
1210
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
@@ -1064,21 +1215,25 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|
1064
1215
|
auto parameters = function.at("parameters");
|
1065
1216
|
builder.resolve_refs(parameters);
|
1066
1217
|
tool_rules.push_back(builder.add_rule(name + "-call",
|
1067
|
-
"\"<|tool▁call▁begin
|
1218
|
+
"( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n"
|
1068
1219
|
"```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
|
1069
1220
|
"\"```<|tool▁call▁end|>\""));
|
1070
1221
|
});
|
1071
1222
|
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
1072
1223
|
// so we accept common variants (then it's all constrained)
|
1073
1224
|
builder.add_rule("root",
|
1074
|
-
|
1225
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
1226
|
+
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
|
1075
1227
|
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
1076
1228
|
"\"<|tool▁calls▁end|>\""
|
1077
1229
|
" space");
|
1078
|
-
data.grammar_triggers.push_back({
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1230
|
+
data.grammar_triggers.push_back({
|
1231
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
1232
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
1233
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
1234
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
1235
|
+
"(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
|
1236
|
+
});
|
1082
1237
|
data.preserved_tokens = {
|
1083
1238
|
"<think>",
|
1084
1239
|
"</think>",
|
@@ -1090,65 +1245,27 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|
1090
1245
|
};
|
1091
1246
|
});
|
1092
1247
|
}
|
1093
|
-
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
1094
|
-
|
1095
|
-
// Hacks to fix the official (broken) prompt.
|
1096
|
-
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
1097
|
-
// until the official template is fixed.
|
1098
|
-
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
1099
|
-
// Don't leave the chat dangling after tool results
|
1100
|
-
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
1101
|
-
prompt += "<|end▁of▁sentence|>";
|
1102
|
-
if (inputs.add_generation_prompt) {
|
1103
|
-
prompt += "<|Assistant|>";
|
1104
|
-
}
|
1105
|
-
}
|
1106
|
-
// Fix up tool call delta example added by Minja
|
1107
|
-
prompt = std::regex_replace(
|
1108
|
-
prompt,
|
1109
|
-
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
1110
|
-
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
1111
|
-
}
|
1112
|
-
data.prompt = prompt;
|
1113
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
1114
1248
|
return data;
|
1115
1249
|
}
|
1116
|
-
static
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
1136
|
-
static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
1137
|
-
static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
1138
|
-
static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
1139
|
-
|
1140
|
-
common_chat_msg msg;
|
1141
|
-
msg.role = "assistant";
|
1142
|
-
std::smatch match;
|
1143
|
-
if (std::regex_search(input, match, tool_calls_regex)) {
|
1144
|
-
auto tool_calls = match[1].str();
|
1145
|
-
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
1146
|
-
msg.tool_calls = std::move(msg2.tool_calls);
|
1147
|
-
} else {
|
1148
|
-
msg.content = input;
|
1149
|
-
}
|
1150
|
-
return msg;
|
1151
|
-
});
|
1250
|
+
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
1251
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
1252
|
+
if (!builder.syntax().parse_tool_calls) {
|
1253
|
+
builder.add_content(builder.consume_rest());
|
1254
|
+
return;
|
1255
|
+
}
|
1256
|
+
|
1257
|
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
1258
|
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
1259
|
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
|
1260
|
+
static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
1261
|
+
|
1262
|
+
parse_json_tool_calls(
|
1263
|
+
builder,
|
1264
|
+
/* block_open= */ tool_calls_begin,
|
1265
|
+
/* function_regex_start_only= */ std::nullopt,
|
1266
|
+
function_regex,
|
1267
|
+
close_regex,
|
1268
|
+
tool_calls_end);
|
1152
1269
|
}
|
1153
1270
|
|
1154
1271
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
@@ -1196,13 +1313,19 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|
1196
1313
|
}
|
1197
1314
|
return data;
|
1198
1315
|
}
|
1199
|
-
static
|
1200
|
-
|
1316
|
+
static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
|
1317
|
+
if (!builder.syntax().parse_tool_calls) {
|
1318
|
+
builder.add_content(builder.consume_rest());
|
1319
|
+
return;
|
1320
|
+
}
|
1321
|
+
static const common_regex prefix(regex_escape(" functools["));
|
1322
|
+
parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
|
1201
1323
|
}
|
1202
1324
|
|
1203
1325
|
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
1204
1326
|
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
|
1205
1327
|
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
|
1328
|
+
// If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
|
1206
1329
|
common_chat_params data;
|
1207
1330
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
1208
1331
|
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
@@ -1216,24 +1339,21 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|
1216
1339
|
std::string name = function.at("name");
|
1217
1340
|
auto parameters = function.at("parameters");
|
1218
1341
|
builder.resolve_refs(parameters);
|
1342
|
+
std::string args_pattern = "[\\s\\S]*";
|
1219
1343
|
auto args_rule = builder.add_schema(name + "-args", parameters);
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
}
|
1230
|
-
data.grammar_triggers.push_back({
|
1231
|
-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
1232
|
-
regex_escape(">>>" + name + "\n"),
|
1233
|
-
});
|
1344
|
+
if (name == "python") {
|
1345
|
+
args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
|
1346
|
+
} else {
|
1347
|
+
args_pattern = "\\{" + args_pattern;
|
1348
|
+
}
|
1349
|
+
auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
|
1350
|
+
first_tool_rules.push_back(call_rule);
|
1351
|
+
if (inputs.parallel_tool_calls) {
|
1352
|
+
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
|
1353
|
+
}
|
1234
1354
|
data.grammar_triggers.push_back({
|
1235
|
-
|
1236
|
-
"
|
1355
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
1356
|
+
"((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
|
1237
1357
|
});
|
1238
1358
|
});
|
1239
1359
|
data.preserved_tokens = {
|
@@ -1251,40 +1371,33 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|
1251
1371
|
}
|
1252
1372
|
return data;
|
1253
1373
|
}
|
1254
|
-
|
1255
|
-
static
|
1256
|
-
static const
|
1257
|
-
static const
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
} catch (const std::exception & e) {
|
1282
|
-
LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
|
1283
|
-
common_chat_msg res;
|
1284
|
-
res.role = "assistant";
|
1285
|
-
res.content = input;
|
1286
|
-
return res;
|
1287
|
-
}
|
1374
|
+
static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
|
1375
|
+
static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
|
1376
|
+
static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
|
1377
|
+
static const common_regex close_regex(R"(\s*)");
|
1378
|
+
|
1379
|
+
parse_json_tool_calls(
|
1380
|
+
builder,
|
1381
|
+
std::nullopt,
|
1382
|
+
function_regex_start_only,
|
1383
|
+
function_regex,
|
1384
|
+
close_regex,
|
1385
|
+
std::nullopt,
|
1386
|
+
/* allow_raw_python= */ true,
|
1387
|
+
/* get_function_name= */ [&](const auto & res) -> std::string {
|
1388
|
+
auto at_start = res.groups[0].begin == 0;
|
1389
|
+
auto name = builder.str(res.groups[1]);
|
1390
|
+
if (!name.empty() && name.back() == '{') {
|
1391
|
+
// Unconsume the opening brace '{' to ensure the JSON parsing goes well.
|
1392
|
+
builder.move_back(1);
|
1393
|
+
}
|
1394
|
+
auto idx = name.find_last_not_of("\n{");
|
1395
|
+
name = name.substr(0, idx + 1);
|
1396
|
+
if (at_start && name == "all") {
|
1397
|
+
return "";
|
1398
|
+
}
|
1399
|
+
return name;
|
1400
|
+
});
|
1288
1401
|
}
|
1289
1402
|
|
1290
1403
|
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
@@ -1345,229 +1458,224 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
|
|
1345
1458
|
// TODO: if (has_raw_python)
|
1346
1459
|
return data;
|
1347
1460
|
}
|
1348
|
-
static
|
1461
|
+
static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
|
1462
|
+
if (!builder.syntax().parse_tool_calls) {
|
1463
|
+
builder.add_content(builder.consume_rest());
|
1464
|
+
return;
|
1465
|
+
}
|
1349
1466
|
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
1350
|
-
static const
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
|
1358
|
-
|
1359
|
-
|
1360
|
-
|
1361
|
-
|
1362
|
-
|
1467
|
+
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
|
1468
|
+
|
1469
|
+
static const common_regex function_regex(R"(<function=(\w+)>)");
|
1470
|
+
static const common_regex close_regex(R"(</function>)");
|
1471
|
+
|
1472
|
+
parse_json_tool_calls(
|
1473
|
+
builder,
|
1474
|
+
/* block_open= */ std::nullopt,
|
1475
|
+
/* function_regex_start_only= */ std::nullopt,
|
1476
|
+
function_regex,
|
1477
|
+
close_regex,
|
1478
|
+
std::nullopt);
|
1479
|
+
|
1480
|
+
if (auto res = builder.try_find_regex(python_tag_regex)) {
|
1481
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
1482
|
+
builder.add_tool_call("python", "", arguments);
|
1483
|
+
return;
|
1363
1484
|
}
|
1364
|
-
static const std::regex function_regex(R"(<function=(\w+)>)");
|
1365
|
-
static const std::regex close_regex(R"(</function>)");
|
1366
|
-
// TODO: tighten & simplify.
|
1367
|
-
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
1368
1485
|
}
|
1369
1486
|
|
1370
1487
|
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
1371
1488
|
common_chat_params data;
|
1372
|
-
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
1373
|
-
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
1374
|
-
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
1375
|
-
std::vector<std::string> tool_rules;
|
1376
|
-
std::vector<std::string> tool_call_alts;
|
1377
|
-
foreach_function(inputs.tools, [&](const json & tool) {
|
1378
|
-
const auto & function = tool.at("function");
|
1379
|
-
std::string name = function.at("name");
|
1380
|
-
auto parameters = function.at("parameters");
|
1381
|
-
builder.resolve_refs(parameters);
|
1382
|
-
tool_rules.push_back(builder.add_schema(name + "-call", {
|
1383
|
-
{"type", "object"},
|
1384
|
-
{"properties", json {
|
1385
|
-
{"name", json {{"const", name}}},
|
1386
|
-
{"arguments", parameters},
|
1387
|
-
}},
|
1388
|
-
{"required", json::array({"name", "arguments"})},
|
1389
|
-
}));
|
1390
|
-
tool_call_alts.push_back(builder.add_rule(
|
1391
|
-
name + "-function-tag",
|
1392
|
-
"\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
|
1393
|
-
builder.add_schema(name + "-args", parameters) + " "
|
1394
|
-
"\"</function>\" space"));
|
1395
1489
|
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1490
|
+
json additional_context = {
|
1491
|
+
{"enable_thinking", inputs.enable_thinking},
|
1492
|
+
};
|
1493
|
+
|
1494
|
+
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, additional_context);
|
1495
|
+
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
1496
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
1497
|
+
if (!inputs.enable_thinking) {
|
1498
|
+
data.prompt += "</think>";
|
1499
|
+
} else {
|
1500
|
+
data.thinking_forced_open = true;
|
1501
|
+
}
|
1502
|
+
}
|
1503
|
+
|
1504
|
+
if (!inputs.tools.is_null()) {
|
1505
|
+
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
1506
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
1507
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
1508
|
+
std::vector<std::string> tool_rules;
|
1509
|
+
std::vector<std::string> tool_call_alts;
|
1510
|
+
std::vector<std::string> escaped_names;
|
1511
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
1512
|
+
const auto & function = tool.at("function");
|
1513
|
+
std::string name = function.at("name");
|
1514
|
+
auto parameters = function.at("parameters");
|
1515
|
+
builder.resolve_refs(parameters);
|
1516
|
+
tool_rules.push_back(builder.add_schema(name + "-call", {
|
1517
|
+
{"type", "object"},
|
1518
|
+
{"properties", json {
|
1519
|
+
{"name", json {{"const", name}}},
|
1520
|
+
{"arguments", parameters},
|
1521
|
+
}},
|
1522
|
+
{"required", json::array({"name", "arguments"})},
|
1523
|
+
}));
|
1524
|
+
tool_call_alts.push_back(builder.add_rule(
|
1525
|
+
name + "-function-tag",
|
1526
|
+
"\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
|
1527
|
+
builder.add_schema(name + "-args", parameters) + " "
|
1528
|
+
"\"</function>\" space"));
|
1529
|
+
|
1530
|
+
data.grammar_triggers.push_back({
|
1531
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
1532
|
+
"<function=" + name + ">",
|
1533
|
+
});
|
1534
|
+
auto escaped_name = regex_escape(name);
|
1535
|
+
data.grammar_triggers.push_back({
|
1536
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
1537
|
+
"<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
|
1538
|
+
});
|
1539
|
+
escaped_names.push_back(escaped_name);
|
1399
1540
|
});
|
1400
|
-
auto
|
1541
|
+
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
|
1542
|
+
std::vector<std::string> alt_tags {
|
1543
|
+
any_tool_call,
|
1544
|
+
"\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
|
1545
|
+
// The rest is just to accommodate common "good bad" outputs.
|
1546
|
+
"\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
|
1547
|
+
"\"<response>\" space " + any_tool_call + " \"</response>\"",
|
1548
|
+
"\"<tools>\" space " + any_tool_call + " \"</tools>\"",
|
1549
|
+
"\"<json>\" space " + any_tool_call + " \"</json>\"",
|
1550
|
+
"\"<xml>\" space " + any_tool_call + " \"</xml>\"",
|
1551
|
+
"\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
|
1552
|
+
};
|
1553
|
+
auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
|
1554
|
+
tool_call_alts.push_back(wrappable_tool_call);
|
1555
|
+
tool_call_alts.push_back(
|
1556
|
+
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
|
1557
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
|
1558
|
+
builder.add_rule("root",
|
1559
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
1560
|
+
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
|
1561
|
+
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
|
1401
1562
|
data.grammar_triggers.push_back({
|
1402
|
-
|
1403
|
-
|
1563
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
1564
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
1565
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
1566
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
|
1567
|
+
"(\\s*"
|
1568
|
+
"(?:<tool_call>"
|
1569
|
+
"|<function"
|
1570
|
+
"|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
|
1571
|
+
"\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
|
1572
|
+
")"
|
1573
|
+
")[\\s\\S]*"
|
1574
|
+
),
|
1404
1575
|
});
|
1576
|
+
data.preserved_tokens = {
|
1577
|
+
"<think>",
|
1578
|
+
"</think>",
|
1579
|
+
"<tool_call>",
|
1580
|
+
"</tool_call>",
|
1581
|
+
"<function",
|
1582
|
+
"<tools>",
|
1583
|
+
"</tools>",
|
1584
|
+
"<response>",
|
1585
|
+
"</response>",
|
1586
|
+
"<function_call>",
|
1587
|
+
"</function_call>",
|
1588
|
+
"<json>",
|
1589
|
+
"</json>",
|
1590
|
+
"<JSON>",
|
1591
|
+
"</JSON>",
|
1592
|
+
"```",
|
1593
|
+
"```json",
|
1594
|
+
"```xml",
|
1595
|
+
};
|
1405
1596
|
});
|
1406
|
-
|
1407
|
-
std::vector<std::string> alt_tags {
|
1408
|
-
any_tool_call,
|
1409
|
-
"\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
|
1410
|
-
// The rest is just to accommodate common "good bad" outputs.
|
1411
|
-
"\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
|
1412
|
-
"\"<response>\" space " + any_tool_call + " \"</response>\"",
|
1413
|
-
"\"<tools>\" space " + any_tool_call + " \"</tools>\"",
|
1414
|
-
"\"<json>\" space " + any_tool_call + " \"</json>\"",
|
1415
|
-
"\"<xml>\" space " + any_tool_call + " \"</xml>\"",
|
1416
|
-
"\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
|
1417
|
-
};
|
1418
|
-
auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
|
1419
|
-
tool_call_alts.push_back(wrappable_tool_call);
|
1420
|
-
tool_call_alts.push_back(
|
1421
|
-
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
|
1422
|
-
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
|
1423
|
-
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
1424
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"});
|
1425
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function"});
|
1426
|
-
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
|
1427
|
-
data.grammar_triggers.push_back({
|
1428
|
-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
|
1429
|
-
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
|
1430
|
-
});
|
1431
|
-
data.preserved_tokens = {
|
1432
|
-
"<think>",
|
1433
|
-
"</think>",
|
1434
|
-
"<tool_call>",
|
1435
|
-
"</tool_call>",
|
1436
|
-
"<function",
|
1437
|
-
"<tools>",
|
1438
|
-
"</tools>",
|
1439
|
-
"<response>",
|
1440
|
-
"</response>",
|
1441
|
-
"<function_call>",
|
1442
|
-
"</function_call>",
|
1443
|
-
"<json>",
|
1444
|
-
"</json>",
|
1445
|
-
"<JSON>",
|
1446
|
-
"</JSON>",
|
1447
|
-
"```",
|
1448
|
-
"```json",
|
1449
|
-
"```xml",
|
1450
|
-
};
|
1451
|
-
});
|
1597
|
+
}
|
1452
1598
|
|
1453
|
-
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
1454
|
-
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
1455
1599
|
return data;
|
1456
1600
|
}
|
1457
|
-
static
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
"
|
1467
|
-
"
|
1468
|
-
|
1469
|
-
|
1601
|
+
static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
|
1602
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
1603
|
+
if (!builder.syntax().parse_tool_calls) {
|
1604
|
+
builder.add_content(builder.consume_rest());
|
1605
|
+
return;
|
1606
|
+
}
|
1607
|
+
|
1608
|
+
static const common_regex open_regex(
|
1609
|
+
"(?:"
|
1610
|
+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
1611
|
+
"(" // match 2 (open_tag)
|
1612
|
+
"<tool_call>"
|
1613
|
+
"|<function_call>"
|
1614
|
+
"|<tool>"
|
1615
|
+
"|<tools>"
|
1616
|
+
"|<response>"
|
1617
|
+
"|<json>"
|
1618
|
+
"|<xml>"
|
1619
|
+
"|<JSON>"
|
1470
1620
|
")?"
|
1471
|
-
"(\\s*\\{\\s*\"name\"
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1477
|
-
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
|
1484
|
-
|
1485
|
-
|
1486
|
-
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1621
|
+
"(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
|
1622
|
+
")"
|
1623
|
+
"|<function=([^>]+)>" // match 4 (function name)
|
1624
|
+
"|<function name=\"([^\"]+)\">" // match 5 (function name again)
|
1625
|
+
);
|
1626
|
+
|
1627
|
+
if (auto res = builder.try_find_regex(open_regex)) {
|
1628
|
+
const auto & block_start = res->groups[1];
|
1629
|
+
std::string block_end = block_start.empty() ? "" : "```";
|
1630
|
+
|
1631
|
+
const auto & open_tag = res->groups[2];
|
1632
|
+
std::string close_tag;
|
1633
|
+
|
1634
|
+
if (!res->groups[3].empty()) {
|
1635
|
+
builder.move_to(res->groups[3].begin);
|
1636
|
+
close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
|
1637
|
+
|
1638
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
|
1639
|
+
if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
|
1640
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
1641
|
+
}
|
1642
|
+
builder.consume_spaces();
|
1643
|
+
builder.consume_literal(close_tag);
|
1644
|
+
builder.consume_spaces();
|
1645
|
+
if (!block_end.empty()) {
|
1646
|
+
builder.consume_literal(block_end);
|
1647
|
+
builder.consume_spaces();
|
1648
|
+
}
|
1649
|
+
builder.add_content(builder.consume_rest());
|
1650
|
+
} else {
|
1651
|
+
throw common_chat_msg_partial_exception("failed to parse tool call");
|
1652
|
+
}
|
1653
|
+
} else {
|
1654
|
+
auto function_name = builder.str(res->groups[4]);
|
1655
|
+
if (function_name.empty()) {
|
1656
|
+
function_name = builder.str(res->groups[5]);
|
1657
|
+
}
|
1658
|
+
LM_GGML_ASSERT(!function_name.empty());
|
1503
1659
|
|
1504
|
-
|
1505
|
-
it = json_it; // Move iterator past parsed JSON
|
1660
|
+
close_tag = "</function>";
|
1506
1661
|
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
} else {
|
1518
|
-
// Not a valid tool call, treat as content
|
1519
|
-
msg.content += std::string(match[0].first, match[0].second);
|
1520
|
-
it = match[0].second;
|
1521
|
-
}
|
1522
|
-
} else {
|
1523
|
-
auto function_name = match[4].str();
|
1524
|
-
if (function_name.empty()) {
|
1525
|
-
function_name = match[5].str();
|
1526
|
-
}
|
1527
|
-
LM_GGML_ASSERT(!function_name.empty());
|
1528
|
-
|
1529
|
-
close_tag = "</function>";
|
1530
|
-
// Start parsing from after the opening tags
|
1531
|
-
auto json_it = match[6].first;
|
1532
|
-
json arguments;
|
1533
|
-
if (parse_json(json_it, end, arguments)) {
|
1534
|
-
msg.tool_calls.emplace_back(process_tool_call({
|
1535
|
-
{"name", function_name},
|
1536
|
-
{"arguments", arguments},
|
1537
|
-
}));
|
1538
|
-
it = json_it; // Move iterator past parsed JSON
|
1539
|
-
|
1540
|
-
// Handle close tags
|
1541
|
-
consume_spaces(it, end);
|
1542
|
-
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
1543
|
-
throw std::runtime_error("Failed to parse closing tag");
|
1544
|
-
}
|
1545
|
-
consume_spaces(it, end);
|
1546
|
-
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
1547
|
-
throw std::runtime_error("Failed to parse block end");
|
1548
|
-
}
|
1549
|
-
consume_spaces(it, end);
|
1550
|
-
} else {
|
1551
|
-
// Not a valid tool call, treat as content
|
1552
|
-
msg.content += std::string(match[0].first, match[0].second);
|
1553
|
-
it = match[0].second;
|
1554
|
-
}
|
1555
|
-
}
|
1556
|
-
} else {
|
1557
|
-
// Add remaining content
|
1558
|
-
msg.content += std::string(it, end);
|
1559
|
-
break;
|
1662
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
1663
|
+
if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
|
1664
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
1665
|
+
}
|
1666
|
+
builder.consume_spaces();
|
1667
|
+
builder.consume_literal(close_tag);
|
1668
|
+
builder.consume_spaces();
|
1669
|
+
if (!block_end.empty()) {
|
1670
|
+
builder.consume_literal(block_end);
|
1671
|
+
builder.consume_spaces();
|
1560
1672
|
}
|
1561
1673
|
}
|
1562
|
-
|
1563
|
-
} catch (const std::exception & e) {
|
1564
|
-
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
|
1565
|
-
common_chat_msg msg;
|
1566
|
-
msg.role = "assistant";
|
1567
|
-
msg.content = input;
|
1568
|
-
return msg;
|
1674
|
+
builder.add_content(builder.consume_rest());
|
1569
1675
|
}
|
1570
|
-
}
|
1676
|
+
} else {
|
1677
|
+
builder.add_content(builder.consume_rest());
|
1678
|
+
}
|
1571
1679
|
}
|
1572
1680
|
|
1573
1681
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
@@ -1599,8 +1707,8 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
1599
1707
|
const auto & caps = tmpl.original_caps();
|
1600
1708
|
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
|
1601
1709
|
params.add_generation_prompt = inputs.add_generation_prompt;
|
1602
|
-
params.extract_reasoning = inputs.extract_reasoning;
|
1603
1710
|
params.tool_choice = inputs.tool_choice;
|
1711
|
+
params.enable_thinking = inputs.enable_thinking;
|
1604
1712
|
params.grammar = inputs.grammar;
|
1605
1713
|
params.now = inputs.now;
|
1606
1714
|
if (!inputs.json_schema.empty()) {
|
@@ -1634,7 +1742,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
1634
1742
|
}
|
1635
1743
|
|
1636
1744
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
1637
|
-
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()
|
1745
|
+
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
1638
1746
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
1639
1747
|
}
|
1640
1748
|
|
@@ -1719,7 +1827,7 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
1719
1827
|
if (res < 0) {
|
1720
1828
|
// if the custom "tmpl" is not supported, we throw an error
|
1721
1829
|
// this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
|
1722
|
-
throw std::runtime_error("this custom template is not supported");
|
1830
|
+
throw std::runtime_error("this custom template is not supported, try using --jinja");
|
1723
1831
|
}
|
1724
1832
|
|
1725
1833
|
// if it turns out that our buffer is too small, we resize it
|
@@ -1748,44 +1856,66 @@ common_chat_params common_chat_templates_apply(
|
|
1748
1856
|
: common_chat_templates_apply_legacy(tmpls, inputs);
|
1749
1857
|
}
|
1750
1858
|
|
1751
|
-
static
|
1752
|
-
|
1753
|
-
msg.role = "assistant";
|
1754
|
-
msg.content = input;
|
1755
|
-
return msg;
|
1859
|
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
1860
|
+
builder.add_content(builder.consume_rest());
|
1756
1861
|
}
|
1757
1862
|
|
1758
|
-
|
1759
|
-
|
1863
|
+
static void common_chat_parse(common_chat_msg_parser & builder) {
|
1864
|
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
|
1865
|
+
|
1866
|
+
switch (builder.syntax().format) {
|
1760
1867
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
1761
|
-
|
1868
|
+
common_chat_parse_content_only(builder);
|
1869
|
+
break;
|
1762
1870
|
case COMMON_CHAT_FORMAT_GENERIC:
|
1763
|
-
|
1871
|
+
common_chat_parse_generic(builder);
|
1872
|
+
break;
|
1764
1873
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
1765
|
-
|
1874
|
+
common_chat_parse_mistral_nemo(builder);
|
1875
|
+
break;
|
1766
1876
|
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
1767
|
-
|
1877
|
+
common_chat_parse_llama_3_1(builder);
|
1878
|
+
break;
|
1768
1879
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
1769
|
-
|
1880
|
+
common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
|
1881
|
+
break;
|
1770
1882
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
1771
|
-
|
1772
|
-
|
1773
|
-
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
|
1883
|
+
common_chat_parse_deepseek_r1(builder);
|
1884
|
+
break;
|
1774
1885
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
1775
|
-
|
1886
|
+
common_chat_parse_functionary_v3_2(builder);
|
1887
|
+
break;
|
1776
1888
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
1777
|
-
|
1889
|
+
common_chat_parse_functionary_v3_1_llama_3_1(builder);
|
1890
|
+
break;
|
1778
1891
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
1779
|
-
|
1780
|
-
|
1781
|
-
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
|
1892
|
+
common_chat_parse_hermes_2_pro(builder);
|
1893
|
+
break;
|
1782
1894
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
1783
|
-
|
1895
|
+
common_chat_parse_firefunction_v2(builder);
|
1896
|
+
break;
|
1784
1897
|
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
1785
|
-
|
1786
|
-
|
1787
|
-
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
|
1898
|
+
common_chat_parse_command_r7b(builder);
|
1899
|
+
break;
|
1788
1900
|
default:
|
1789
|
-
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
1901
|
+
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
1902
|
+
}
|
1903
|
+
builder.finish();
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
|
1907
|
+
common_chat_msg_parser builder(input, is_partial, syntax);
|
1908
|
+
try {
|
1909
|
+
common_chat_parse(builder);
|
1910
|
+
} catch (const common_chat_msg_partial_exception & ex) {
|
1911
|
+
LOG_DBG("Partial parse: %s\n", ex.what());
|
1912
|
+
if (!is_partial) {
|
1913
|
+
builder.clear_tools();
|
1914
|
+
builder.move_to(0);
|
1915
|
+
common_chat_parse_content_only(builder);
|
1916
|
+
}
|
1790
1917
|
}
|
1918
|
+
auto msg = builder.result();
|
1919
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
|
1920
|
+
return msg;
|
1791
1921
|
}
|