@novastera-oss/llamarn 0.4.1 → 0.4.3-beta4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +3 -0
- package/android/CMakeLists.txt +2 -0
- package/android/src/main/cpp/include/llama.h +44 -21
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +12 -0
- package/cpp/llama.cpp/CODEOWNERS +116 -10
- package/cpp/llama.cpp/CONTRIBUTING.md +30 -3
- package/cpp/llama.cpp/README.md +13 -5
- package/cpp/llama.cpp/build-xcframework.sh +5 -0
- package/cpp/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +12 -2
- package/cpp/llama.cpp/common/arg.cpp +303 -795
- package/cpp/llama.cpp/common/arg.h +2 -3
- package/cpp/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/cpp/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/llama.cpp/common/chat-parser.cpp +156 -15
- package/cpp/llama.cpp/common/chat-parser.h +13 -0
- package/cpp/llama.cpp/common/chat.cpp +1147 -88
- package/cpp/llama.cpp/common/chat.h +16 -3
- package/cpp/llama.cpp/common/common.cpp +70 -15
- package/cpp/llama.cpp/common/common.h +57 -19
- package/cpp/llama.cpp/common/download.cpp +1072 -0
- package/cpp/llama.cpp/common/download.h +55 -0
- package/cpp/llama.cpp/common/http.h +73 -0
- package/cpp/llama.cpp/common/json-partial.cpp +70 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +61 -22
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/cpp/llama.cpp/common/log.cpp +59 -2
- package/cpp/llama.cpp/common/log.h +12 -4
- package/cpp/llama.cpp/common/sampling.cpp +84 -8
- package/cpp/llama.cpp/common/sampling.h +3 -1
- package/cpp/llama.cpp/common/speculative.cpp +1 -1
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1608 -233
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +6 -1
- package/cpp/llama.cpp/convert_lora_to_gguf.py +37 -5
- package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -28
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +19 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml-metal.h +1 -6
- package/cpp/llama.cpp/ggml/include/ggml-rpc.h +7 -9
- package/cpp/llama.cpp/ggml/include/ggml-zdnn.h +2 -1
- package/cpp/llama.cpp/ggml/include/ggml.h +199 -6
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +38 -0
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +299 -130
- package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +21 -5
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +99 -2
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +138 -47
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +1584 -1773
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +201 -317
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +146 -187
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +771 -713
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +135 -77
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +16 -17
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +318 -145
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +155 -60
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +8 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +108 -64
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +14 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +530 -87
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +37 -45
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +349 -127
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +947 -1218
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +143 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +82 -76
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +233 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +326 -66
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +102 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +110 -76
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +167 -38
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d.cu +6 -11
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +245 -151
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +1 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +341 -289
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh +1233 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +123 -220
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +41 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +715 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +150 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +321 -24
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cu +93 -351
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cuh +828 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cu +164 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +3 -166
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cu +371 -78
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +279 -147
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +97 -85
- package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +46 -23
- package/cpp/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu +63 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +12 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +192 -77
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +137 -75
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cu +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu +336 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +105 -11
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +36 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +87 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +28 -12
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +68 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3807 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +442 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.c +69 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +119 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +156 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +64 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +93 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.c +960 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +1032 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +829 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2223 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +418 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +255 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +448 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.h +220 -0
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +110 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +6 -5
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m +599 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1662 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +251 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m +1527 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +244 -39
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +3844 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +90 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +723 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +3453 -1907
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1331 -109
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +126 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +35 -7
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +123 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +341 -161
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +6 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +74 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +50 -30
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +10 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +166 -99
- package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +72 -94
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +21 -31
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +252 -316
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +6 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +9 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +359 -142
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +80 -60
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +201 -132
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +230 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +50 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +45 -36
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +330 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +16 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4184 -2159
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +53 -30
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +13 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +138 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +52 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +50 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +61 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +54 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +229 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +33 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +106 -634
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +118 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +556 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +70 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +77 -214
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +589 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +25 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +55 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +45 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +227 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +5 -52
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +5 -35
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +5 -35
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +5 -41
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +171 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +79 -29
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +471 -196
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1690 -383
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +57 -10
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +25 -912
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/{set_rows.wgsl → set_rows.tmpl.wgsl} +38 -8
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +96 -314
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +440 -17
- package/cpp/llama.cpp/ggml/src/gguf.cpp +104 -29
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +363 -13
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +64 -0
- package/cpp/llama.cpp/gguf-py/gguf/lazy.py +8 -3
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +6 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +156 -18
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +80 -0
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +4 -4
- package/cpp/llama.cpp/include/llama.h +44 -21
- package/cpp/llama.cpp/media/llama1-icon-transparent.png +0 -0
- package/cpp/llama.cpp/media/llama1-icon-transparent.svg +77 -0
- package/cpp/llama.cpp/media/llama1-icon.png +0 -0
- package/cpp/llama.cpp/media/llama1-icon.svg +87 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -3
- package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +3 -1
- package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +101 -0
- package/cpp/llama.cpp/src/llama-adapter.cpp +33 -0
- package/cpp/llama.cpp/src/llama-adapter.h +3 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +344 -14
- package/cpp/llama.cpp/src/llama-arch.h +50 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +63 -31
- package/cpp/llama.cpp/src/llama-batch.h +13 -2
- package/cpp/llama.cpp/src/llama-chat.cpp +85 -3
- package/cpp/llama.cpp/src/llama-chat.h +4 -0
- package/cpp/llama.cpp/src/llama-context.cpp +300 -45
- package/cpp/llama.cpp/src/llama-context.h +16 -6
- package/cpp/llama.cpp/src/llama-cparams.h +2 -1
- package/cpp/llama.cpp/src/llama-grammar.cpp +17 -9
- package/cpp/llama.cpp/src/llama-graph.cpp +226 -64
- package/cpp/llama.cpp/src/llama-graph.h +27 -5
- package/cpp/llama.cpp/src/llama-hparams.cpp +53 -2
- package/cpp/llama.cpp/src/llama-hparams.h +48 -8
- package/cpp/llama.cpp/src/llama-impl.cpp +3 -3
- package/cpp/llama.cpp/src/llama-impl.h +2 -0
- package/cpp/llama.cpp/src/llama-kv-cache-iswa.cpp +13 -3
- package/cpp/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +120 -62
- package/cpp/llama.cpp/src/llama-kv-cache.h +13 -4
- package/cpp/llama.cpp/src/llama-kv-cells.h +44 -2
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +2 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +38 -17
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +5 -2
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1070 -12614
- package/cpp/llama.cpp/src/llama-model.h +40 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +14 -6
- package/cpp/llama.cpp/src/llama-sampling.cpp +243 -136
- package/cpp/llama.cpp/src/llama-vocab.cpp +43 -3
- package/cpp/llama.cpp/src/llama-vocab.h +43 -39
- package/cpp/llama.cpp/src/llama.cpp +69 -10
- package/cpp/llama.cpp/src/models/afmoe.cpp +187 -0
- package/cpp/llama.cpp/src/models/apertus.cpp +125 -0
- package/cpp/llama.cpp/src/models/arcee.cpp +135 -0
- package/cpp/llama.cpp/src/models/arctic.cpp +138 -0
- package/cpp/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/llama.cpp/src/models/baichuan.cpp +122 -0
- package/cpp/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/llama.cpp/src/models/bert.cpp +176 -0
- package/cpp/llama.cpp/src/models/bitnet.cpp +160 -0
- package/cpp/llama.cpp/src/models/bloom.cpp +101 -0
- package/cpp/llama.cpp/src/models/chameleon.cpp +178 -0
- package/cpp/llama.cpp/src/models/chatglm.cpp +132 -0
- package/cpp/llama.cpp/src/models/codeshell.cpp +111 -0
- package/cpp/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/cpp/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/cpp/llama.cpp/src/models/command-r.cpp +122 -0
- package/cpp/llama.cpp/src/models/dbrx.cpp +123 -0
- package/cpp/llama.cpp/src/models/deci.cpp +135 -0
- package/cpp/llama.cpp/src/models/deepseek.cpp +144 -0
- package/cpp/llama.cpp/src/models/deepseek2.cpp +237 -0
- package/cpp/llama.cpp/src/models/dots1.cpp +134 -0
- package/cpp/llama.cpp/src/models/dream.cpp +105 -0
- package/cpp/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/llama.cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/llama.cpp/src/models/exaone.cpp +114 -0
- package/cpp/llama.cpp/src/models/exaone4.cpp +123 -0
- package/cpp/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/cpp/llama.cpp/src/models/falcon.cpp +120 -0
- package/cpp/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/cpp/llama.cpp/src/models/gemma.cpp +112 -0
- package/cpp/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/cpp/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/cpp/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/cpp/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/cpp/llama.cpp/src/models/glm4.cpp +127 -0
- package/cpp/llama.cpp/src/models/gpt2.cpp +105 -0
- package/cpp/llama.cpp/src/models/gptneox.cpp +144 -0
- package/cpp/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/llama.cpp/src/models/granite.cpp +211 -0
- package/cpp/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/cpp/llama.cpp/src/models/grok.cpp +159 -0
- package/cpp/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/llama.cpp/src/models/internlm2.cpp +120 -0
- package/cpp/llama.cpp/src/models/jais.cpp +86 -0
- package/cpp/llama.cpp/src/models/jamba.cpp +106 -0
- package/cpp/llama.cpp/src/models/lfm2.cpp +173 -0
- package/cpp/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/llama.cpp/src/models/llada.cpp +99 -0
- package/cpp/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/cpp/llama.cpp/src/models/llama.cpp +155 -0
- package/cpp/llama.cpp/src/models/mamba.cpp +55 -0
- package/cpp/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/cpp/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/llama.cpp/src/models/models.h +485 -0
- package/cpp/llama.cpp/src/models/mpt.cpp +126 -0
- package/cpp/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/cpp/llama.cpp/src/models/nemotron.cpp +122 -0
- package/cpp/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/llama.cpp/src/models/olmo.cpp +121 -0
- package/cpp/llama.cpp/src/models/olmo2.cpp +150 -0
- package/cpp/llama.cpp/src/models/olmoe.cpp +124 -0
- package/cpp/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
- package/cpp/llama.cpp/src/models/openelm.cpp +124 -0
- package/cpp/llama.cpp/src/models/orion.cpp +123 -0
- package/cpp/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/llama.cpp/src/models/phi2.cpp +121 -0
- package/cpp/llama.cpp/src/models/phi3.cpp +152 -0
- package/cpp/llama.cpp/src/models/plamo.cpp +110 -0
- package/cpp/llama.cpp/src/models/plamo2.cpp +316 -0
- package/cpp/llama.cpp/src/models/plm.cpp +168 -0
- package/cpp/llama.cpp/src/models/qwen.cpp +108 -0
- package/cpp/llama.cpp/src/models/qwen2.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen3.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/cpp/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/cpp/llama.cpp/src/models/refact.cpp +94 -0
- package/cpp/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/cpp/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/cpp/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/cpp/llama.cpp/src/models/smollm3.cpp +128 -0
- package/cpp/llama.cpp/src/models/stablelm.cpp +146 -0
- package/cpp/llama.cpp/src/models/starcoder.cpp +100 -0
- package/cpp/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/llama.cpp/src/models/xverse.cpp +108 -0
- package/cpp/llama.cpp/src/unicode.cpp +77 -0
- package/cpp/llama.cpp/src/unicode.h +43 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +94 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.cpp +9339 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +433 -8222
- package/cpp/llama.cpp/vendor/cpp-httplib/patch-boringssl.cmake +6 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +4179 -1900
- package/cpp/llama.cpp/vendor/minja/chat-template.hpp +9 -2
- package/cpp/llama.cpp/vendor/minja/minja.hpp +101 -22
- package/ios/include/chat.h +16 -3
- package/ios/include/common/minja/chat-template.hpp +9 -2
- package/ios/include/common/minja/minja.hpp +101 -22
- package/ios/include/common.h +57 -19
- package/ios/include/json-schema-to-grammar.h +2 -0
- package/ios/include/llama.h +44 -21
- package/ios/include/log.h +12 -4
- package/ios/include/sampling.h +3 -1
- package/ios/libs/llama.xcframework/Info.plist +20 -20
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6399 -5557
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +19 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +1 -6
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +199 -6
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +44 -21
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6362 -5520
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4813 -4241
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +19 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +1 -6
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +199 -6
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +44 -21
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +10 -4
- package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +0 -2579
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -371
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -379
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -495
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -486
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +0 -6886
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -154
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn-impl.h +0 -97
- package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
- package/cpp/llama.cpp/models/templates/ByteDance-Seed-OSS.jinja +0 -171
- package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +0 -202
- package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +0 -156
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +0 -124
- package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +0 -152
- package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +0 -152
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +0 -62
- package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +0 -54
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +0 -85
- package/cpp/llama.cpp/models/templates/README.md +0 -25
- package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +0 -1
- package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +0 -1
- package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +0 -57
- package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +0 -4
- package/cpp/llama.cpp/models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +0 -59
- package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +0 -76
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +0 -34
- package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +0 -58
- package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +0 -287
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +0 -109
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +0 -93
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +0 -109
- package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +0 -8
- package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +0 -87
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +0 -43
- package/cpp/llama.cpp/models/templates/openai-gpt-oss-120b.jinja +0 -331
- package/cpp/llama.cpp/models/templates/unsloth-mistral-Devstral-Small-2507.jinja +0 -105
- package/cpp/llama.cpp/prompts/LLM-questions.txt +0 -49
- package/cpp/llama.cpp/prompts/alpaca.txt +0 -1
- package/cpp/llama.cpp/prompts/assistant.txt +0 -31
- package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
- package/cpp/llama.cpp/prompts/chat-with-bob.txt +0 -7
- package/cpp/llama.cpp/prompts/chat-with-qwen.txt +0 -1
- package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
- package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
- package/cpp/llama.cpp/prompts/chat.txt +0 -28
- package/cpp/llama.cpp/prompts/dan-modified.txt +0 -1
- package/cpp/llama.cpp/prompts/dan.txt +0 -1
- package/cpp/llama.cpp/prompts/mnemonics.txt +0 -93
- package/cpp/llama.cpp/prompts/parallel-questions.txt +0 -43
- package/cpp/llama.cpp/prompts/reason-act.txt +0 -18
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4247
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5561
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +0 -35
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4246
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +0 -35
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5558
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +0 -32
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5520
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4243
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +0 -32
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -9,8 +9,11 @@
|
|
|
9
9
|
#include <minja/chat-template.hpp>
|
|
10
10
|
#include <minja/minja.hpp>
|
|
11
11
|
|
|
12
|
+
#include <algorithm>
|
|
12
13
|
#include <cstdio>
|
|
14
|
+
#include <cctype>
|
|
13
15
|
#include <exception>
|
|
16
|
+
#include <functional>
|
|
14
17
|
#include <iostream>
|
|
15
18
|
#include <optional>
|
|
16
19
|
#include <stdexcept>
|
|
@@ -163,6 +166,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
|
163
166
|
throw std::runtime_error("Invalid tool_choice: " + tool_choice);
|
|
164
167
|
}
|
|
165
168
|
|
|
169
|
+
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
|
|
170
|
+
common_chat_templates_inputs dummy_inputs;
|
|
171
|
+
common_chat_msg msg;
|
|
172
|
+
msg.role = "user";
|
|
173
|
+
msg.content = "test";
|
|
174
|
+
dummy_inputs.messages = {msg};
|
|
175
|
+
dummy_inputs.enable_thinking = false;
|
|
176
|
+
const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
|
|
177
|
+
dummy_inputs.enable_thinking = true;
|
|
178
|
+
const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
|
|
179
|
+
return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
|
|
180
|
+
}
|
|
181
|
+
|
|
166
182
|
template <>
|
|
167
183
|
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
|
|
168
184
|
std::vector<common_chat_msg> msgs;
|
|
@@ -297,7 +313,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
|
|
|
297
313
|
}
|
|
298
314
|
if (!msg.reasoning_content.empty()) {
|
|
299
315
|
jmsg["reasoning_content"] = msg.reasoning_content;
|
|
300
|
-
jmsg["thinking"] = msg.reasoning_content; // gpt-oss
|
|
301
316
|
}
|
|
302
317
|
if (!msg.tool_name.empty()) {
|
|
303
318
|
jmsg["name"] = msg.tool_name;
|
|
@@ -612,17 +627,28 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
612
627
|
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
|
613
628
|
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
|
614
629
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
|
|
630
|
+
case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
|
|
615
631
|
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
|
616
632
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
|
617
633
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
|
618
634
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
|
619
635
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
|
620
636
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
|
637
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
|
|
621
638
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
|
622
639
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
|
623
640
|
case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
|
|
624
641
|
case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
|
|
625
642
|
case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
|
|
643
|
+
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
|
|
644
|
+
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
|
|
645
|
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
|
|
646
|
+
case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
|
|
647
|
+
case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
|
|
648
|
+
case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
|
|
649
|
+
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
|
650
|
+
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
|
651
|
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
626
652
|
default:
|
|
627
653
|
throw std::runtime_error("Unknown chat format");
|
|
628
654
|
}
|
|
@@ -684,11 +710,13 @@ static void parse_json_tool_calls(
|
|
|
684
710
|
size_t from = std::string::npos;
|
|
685
711
|
auto first = true;
|
|
686
712
|
while (true) {
|
|
713
|
+
auto start_pos = builder.pos();
|
|
687
714
|
auto res = function_regex_start_only && first
|
|
688
715
|
? builder.try_consume_regex(*function_regex_start_only)
|
|
689
716
|
: function_regex
|
|
690
717
|
? builder.try_find_regex(*function_regex, from)
|
|
691
718
|
: std::nullopt;
|
|
719
|
+
|
|
692
720
|
if (res) {
|
|
693
721
|
std::string name;
|
|
694
722
|
if (get_function_name) {
|
|
@@ -723,6 +751,8 @@ static void parse_json_tool_calls(
|
|
|
723
751
|
return;
|
|
724
752
|
}
|
|
725
753
|
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
754
|
+
} else {
|
|
755
|
+
builder.move_to(start_pos);
|
|
726
756
|
}
|
|
727
757
|
break;
|
|
728
758
|
}
|
|
@@ -782,6 +812,7 @@ static std::string apply(
|
|
|
782
812
|
}
|
|
783
813
|
tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt;
|
|
784
814
|
tmpl_inputs.extra_context = inputs.extra_context;
|
|
815
|
+
tmpl_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
|
|
785
816
|
if (additional_context) {
|
|
786
817
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
787
818
|
}
|
|
@@ -963,6 +994,185 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
|
963
994
|
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
|
964
995
|
return data;
|
|
965
996
|
}
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
// Case-insensitive find
|
|
1000
|
+
static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
|
|
1001
|
+
auto it = std::search(
|
|
1002
|
+
haystack.begin() + pos, haystack.end(),
|
|
1003
|
+
needle.begin(), needle.end(),
|
|
1004
|
+
[](char a, char b) { return std::tolower(a) == std::tolower(b); }
|
|
1005
|
+
);
|
|
1006
|
+
return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1010
|
+
common_chat_params data;
|
|
1011
|
+
const auto is_json_schema_provided = !inputs.json_schema.is_null();
|
|
1012
|
+
const auto is_grammar_provided = !inputs.grammar.empty();
|
|
1013
|
+
const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
|
|
1014
|
+
|
|
1015
|
+
// the logic requires potentially modifying the messages
|
|
1016
|
+
auto tweaked_messages = inputs.messages;
|
|
1017
|
+
|
|
1018
|
+
auto replace_json_schema_marker = [](json & messages) -> bool {
|
|
1019
|
+
static std::string marker1 = "force json schema.\n";
|
|
1020
|
+
static std::string marker2 = "force json schema.";
|
|
1021
|
+
|
|
1022
|
+
if (messages.empty() || messages.at(0).at("role") != "system") {
|
|
1023
|
+
return false;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
std::string content = messages.at(0).at("content");
|
|
1027
|
+
|
|
1028
|
+
for (const auto & marker : {marker1, marker2}) {
|
|
1029
|
+
const auto pos = ifind_string(content, marker);
|
|
1030
|
+
if (pos != std::string::npos) {
|
|
1031
|
+
content.replace(pos, marker.length(), "");
|
|
1032
|
+
// inject modified content back into the messages
|
|
1033
|
+
messages.at(0).at("content") = content;
|
|
1034
|
+
return true;
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
return false;
|
|
1039
|
+
};
|
|
1040
|
+
|
|
1041
|
+
// Lfm2 model does not natively work with json, but can generally understand the tools structure
|
|
1042
|
+
//
|
|
1043
|
+
// Example of the pytorch dialog structure:
|
|
1044
|
+
// <|startoftext|><|im_start|>system
|
|
1045
|
+
// List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
|
|
1046
|
+
// <|im_start|>user
|
|
1047
|
+
// What is the current status of candidate ID 12345?<|im_end|>
|
|
1048
|
+
// <|im_start|>assistant
|
|
1049
|
+
// <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
|
|
1050
|
+
// <|im_start|>tool
|
|
1051
|
+
// <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
|
|
1052
|
+
// <|im_start|>assistant
|
|
1053
|
+
// The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
|
|
1054
|
+
//
|
|
1055
|
+
// For the llama server compatibility with json tools semantic,
|
|
1056
|
+
// the client can add "Follow json schema." line into the system message prompt to force the json output.
|
|
1057
|
+
//
|
|
1058
|
+
if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
|
|
1059
|
+
// server/utils.hpp prohibits that branch for the custom grammar anyways
|
|
1060
|
+
throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
|
|
1061
|
+
} else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
|
|
1062
|
+
LOG_INF("%s: Using tools to build a grammar\n", __func__);
|
|
1063
|
+
|
|
1064
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1065
|
+
auto schemas = json::array();
|
|
1066
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1067
|
+
const auto & function = tool.at("function");
|
|
1068
|
+
schemas.push_back({
|
|
1069
|
+
{"type", "object"},
|
|
1070
|
+
{"properties", {
|
|
1071
|
+
{"name", {
|
|
1072
|
+
{"type", "string"},
|
|
1073
|
+
{"const", function.at("name")},
|
|
1074
|
+
}},
|
|
1075
|
+
{"arguments", function.at("parameters")},
|
|
1076
|
+
}},
|
|
1077
|
+
{"required", json::array({"name", "arguments", "id"})},
|
|
1078
|
+
});
|
|
1079
|
+
});
|
|
1080
|
+
auto schema = json {
|
|
1081
|
+
{"type", "array"},
|
|
1082
|
+
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
1083
|
+
{"minItems", 1},
|
|
1084
|
+
};
|
|
1085
|
+
if (!inputs.parallel_tool_calls) {
|
|
1086
|
+
schema["maxItems"] = 1;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
|
|
1090
|
+
});
|
|
1091
|
+
// model has no concept of tool selection mode choice,
|
|
1092
|
+
// if the system prompt rendered correctly it will produce a tool call
|
|
1093
|
+
// the grammar goes inside the tool call body
|
|
1094
|
+
data.grammar_lazy = true;
|
|
1095
|
+
data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
|
|
1096
|
+
data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
|
|
1097
|
+
data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
|
|
1098
|
+
} else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
|
|
1099
|
+
LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
|
|
1100
|
+
// output those tokens
|
|
1101
|
+
data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
|
|
1102
|
+
} else if (is_json_schema_provided) {
|
|
1103
|
+
LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
|
|
1104
|
+
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
|
1105
|
+
} else if (is_grammar_provided) {
|
|
1106
|
+
LOG_INF("%s: Using provided grammar\n", __func__);
|
|
1107
|
+
data.grammar = inputs.grammar;
|
|
1108
|
+
} else {
|
|
1109
|
+
LOG_INF("%s: Using content relying on the template\n", __func__);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
|
|
1113
|
+
LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
|
|
1114
|
+
|
|
1115
|
+
return data;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1119
|
+
common_chat_params data;
|
|
1120
|
+
data.prompt = apply(tmpl, inputs);
|
|
1121
|
+
data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
|
|
1122
|
+
data.preserved_tokens = {
|
|
1123
|
+
"[THINK]",
|
|
1124
|
+
"[/THINK]",
|
|
1125
|
+
};
|
|
1126
|
+
|
|
1127
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1128
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1129
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1130
|
+
auto schemas = json::array();
|
|
1131
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1132
|
+
const auto & function = tool.at("function");
|
|
1133
|
+
schemas.push_back({
|
|
1134
|
+
{"type", "object"},
|
|
1135
|
+
{"properties", {
|
|
1136
|
+
{"name", {
|
|
1137
|
+
{"type", "string"},
|
|
1138
|
+
{"const", function.at("name")},
|
|
1139
|
+
}},
|
|
1140
|
+
{"arguments", function.at("parameters")},
|
|
1141
|
+
{"id", {
|
|
1142
|
+
{"type", "string"},
|
|
1143
|
+
{"pattern", "^[a-zA-Z0-9]{9}$"},
|
|
1144
|
+
}},
|
|
1145
|
+
}},
|
|
1146
|
+
{"required", json::array({"name", "arguments", "id"})},
|
|
1147
|
+
});
|
|
1148
|
+
});
|
|
1149
|
+
auto schema = json {
|
|
1150
|
+
{"type", "array"},
|
|
1151
|
+
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
1152
|
+
{"minItems", 1},
|
|
1153
|
+
};
|
|
1154
|
+
if (!inputs.parallel_tool_calls) {
|
|
1155
|
+
schema["maxItems"] = 1;
|
|
1156
|
+
}
|
|
1157
|
+
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
|
|
1158
|
+
});
|
|
1159
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
|
|
1160
|
+
data.preserved_tokens.push_back("[TOOL_CALLS]");
|
|
1161
|
+
} else {
|
|
1162
|
+
data.grammar_lazy = false;
|
|
1163
|
+
if (!inputs.json_schema.is_null()) {
|
|
1164
|
+
if (!inputs.grammar.empty()) {
|
|
1165
|
+
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
|
|
1166
|
+
}
|
|
1167
|
+
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
|
1168
|
+
} else {
|
|
1169
|
+
data.grammar = inputs.grammar;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
return data;
|
|
1174
|
+
}
|
|
1175
|
+
|
|
966
1176
|
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
|
967
1177
|
if (!builder.syntax().parse_tool_calls) {
|
|
968
1178
|
builder.add_content(builder.consume_rest());
|
|
@@ -973,6 +1183,18 @@ static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
|
|
973
1183
|
parse_prefixed_json_tool_call_array(builder, prefix);
|
|
974
1184
|
}
|
|
975
1185
|
|
|
1186
|
+
static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
|
|
1187
|
+
builder.try_parse_reasoning("[THINK]", "[/THINK]");
|
|
1188
|
+
|
|
1189
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1190
|
+
builder.add_content(builder.consume_rest());
|
|
1191
|
+
return;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
|
1195
|
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
|
1196
|
+
}
|
|
1197
|
+
|
|
976
1198
|
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
977
1199
|
common_chat_params data;
|
|
978
1200
|
|
|
@@ -1184,7 +1406,139 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
|
|
|
1184
1406
|
});
|
|
1185
1407
|
return data;
|
|
1186
1408
|
}
|
|
1409
|
+
|
|
1410
|
+
static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1411
|
+
common_chat_params data;
|
|
1412
|
+
|
|
1413
|
+
// Generate the prompt using the apply() function with the template
|
|
1414
|
+
data.prompt = apply(tmpl, inputs);
|
|
1415
|
+
data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
|
|
1416
|
+
|
|
1417
|
+
// Handle thinking tags appropriately based on inputs.enable_thinking
|
|
1418
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1419
|
+
if (!inputs.enable_thinking) {
|
|
1420
|
+
data.prompt += "</think>";
|
|
1421
|
+
} else {
|
|
1422
|
+
data.thinking_forced_open = true;
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
// When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
|
|
1427
|
+
if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1428
|
+
data.grammar_lazy = true;
|
|
1429
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1430
|
+
auto schemas = json::array();
|
|
1431
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1432
|
+
const auto & function = tool.at("function");
|
|
1433
|
+
schemas.push_back({
|
|
1434
|
+
{ "type", "object" },
|
|
1435
|
+
{ "properties",
|
|
1436
|
+
{
|
|
1437
|
+
{ "name",
|
|
1438
|
+
{
|
|
1439
|
+
{ "type", "string" },
|
|
1440
|
+
{ "const", function.at("name") },
|
|
1441
|
+
} },
|
|
1442
|
+
{ "arguments", function.at("parameters") },
|
|
1443
|
+
} },
|
|
1444
|
+
{ "required", json::array({ "name", "arguments" }) },
|
|
1445
|
+
});
|
|
1446
|
+
});
|
|
1447
|
+
auto schema = json{
|
|
1448
|
+
{ "type", "array" },
|
|
1449
|
+
{ "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
|
|
1450
|
+
{ "minItems", 1 },
|
|
1451
|
+
};
|
|
1452
|
+
if (!inputs.parallel_tool_calls) {
|
|
1453
|
+
schema["maxItems"] = 1;
|
|
1454
|
+
}
|
|
1455
|
+
builder.add_rule("root",
|
|
1456
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
|
1457
|
+
"\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
|
|
1458
|
+
" \"</TOOLCALL>\"");
|
|
1459
|
+
});
|
|
1460
|
+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1461
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
1462
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1463
|
+
std::string(data.thinking_forced_open ?
|
|
1464
|
+
"[\\s\\S]*?(</think>\\s*)" :
|
|
1465
|
+
"(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
|
1466
|
+
"(<TOOLCALL>)[\\s\\S]*" });
|
|
1467
|
+
}
|
|
1468
|
+
return data;
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1472
|
+
common_chat_params data;
|
|
1473
|
+
|
|
1474
|
+
// Generate the prompt using the apply() function with the template
|
|
1475
|
+
data.prompt = apply(tmpl, inputs);
|
|
1476
|
+
data.format = COMMON_CHAT_FORMAT_APERTUS;
|
|
1477
|
+
|
|
1478
|
+
// Handle thinking tags appropriately based on inputs.enable_thinking
|
|
1479
|
+
if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
|
|
1480
|
+
if (!inputs.enable_thinking) {
|
|
1481
|
+
data.prompt += "<|inner_suffix|>";
|
|
1482
|
+
} else {
|
|
1483
|
+
data.thinking_forced_open = true;
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
// When tools are present, build grammar for the <|tools_prefix|> format
|
|
1488
|
+
if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1489
|
+
data.grammar_lazy = true;
|
|
1490
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1491
|
+
auto schemas = json::array();
|
|
1492
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1493
|
+
const auto & function = tool.at("function");
|
|
1494
|
+
schemas.push_back({
|
|
1495
|
+
{ "type", "object" },
|
|
1496
|
+
{ "properties",
|
|
1497
|
+
{
|
|
1498
|
+
{ function.at("name"), function.at("parameters") }
|
|
1499
|
+
} },
|
|
1500
|
+
{ "required", json::array({ function.at("name") }) },
|
|
1501
|
+
});
|
|
1502
|
+
});
|
|
1503
|
+
auto schema = json{
|
|
1504
|
+
{ "type", "array" },
|
|
1505
|
+
{ "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
|
|
1506
|
+
{ "minItems", 1 },
|
|
1507
|
+
};
|
|
1508
|
+
if (!inputs.parallel_tool_calls) {
|
|
1509
|
+
schema["maxItems"] = 1;
|
|
1510
|
+
}
|
|
1511
|
+
builder.add_rule("root",
|
|
1512
|
+
std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
|
|
1513
|
+
"\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
|
|
1514
|
+
});
|
|
1515
|
+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1516
|
+
// If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
|
|
1517
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1518
|
+
std::string(data.thinking_forced_open ?
|
|
1519
|
+
"[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
|
|
1520
|
+
"(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
|
|
1521
|
+
"(<\\|tools_prefix\\|>)[\\s\\S]*" });
|
|
1522
|
+
data.preserved_tokens = {
|
|
1523
|
+
"<|system_start|>",
|
|
1524
|
+
"<|system_end|>",
|
|
1525
|
+
"<|developer_start|>",
|
|
1526
|
+
"<|developer_end|>",
|
|
1527
|
+
"<|user_start|>",
|
|
1528
|
+
"<|user_end|>",
|
|
1529
|
+
"<|assistant_start|>",
|
|
1530
|
+
"<|assistant_end|>",
|
|
1531
|
+
"<|inner_prefix|>",
|
|
1532
|
+
"<|inner_suffix|>",
|
|
1533
|
+
"<|tools_prefix|>",
|
|
1534
|
+
"<|tools_suffix|>",
|
|
1535
|
+
};
|
|
1536
|
+
}
|
|
1537
|
+
return data;
|
|
1538
|
+
}
|
|
1187
1539
|
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
|
|
1540
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1541
|
+
|
|
1188
1542
|
if (!builder.syntax().parse_tool_calls) {
|
|
1189
1543
|
builder.add_content(builder.consume_rest());
|
|
1190
1544
|
return;
|
|
@@ -1313,6 +1667,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|
|
1313
1667
|
}
|
|
1314
1668
|
return data;
|
|
1315
1669
|
}
|
|
1670
|
+
|
|
1671
|
+
static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1672
|
+
common_chat_params data;
|
|
1673
|
+
|
|
1674
|
+
// Pass thinking context for DeepSeek V3.1 template
|
|
1675
|
+
json additional_context = {
|
|
1676
|
+
{"thinking", inputs.enable_thinking},
|
|
1677
|
+
};
|
|
1678
|
+
|
|
1679
|
+
auto prompt = apply(tmpl, inputs,
|
|
1680
|
+
/* messages_override= */ inputs.messages,
|
|
1681
|
+
/* tools_override= */ std::nullopt,
|
|
1682
|
+
additional_context);
|
|
1683
|
+
data.prompt = prompt;
|
|
1684
|
+
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
|
1685
|
+
if (string_ends_with(data.prompt, "<think>")) {
|
|
1686
|
+
if (!inputs.enable_thinking) {
|
|
1687
|
+
data.prompt += "</think>";
|
|
1688
|
+
} else {
|
|
1689
|
+
data.thinking_forced_open = true;
|
|
1690
|
+
}
|
|
1691
|
+
}
|
|
1692
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1693
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
|
1694
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1695
|
+
std::vector<std::string> tool_rules;
|
|
1696
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1697
|
+
const auto & function = tool.at("function");
|
|
1698
|
+
std::string name = function.at("name");
|
|
1699
|
+
auto parameters = function.at("parameters");
|
|
1700
|
+
builder.resolve_refs(parameters);
|
|
1701
|
+
tool_rules.push_back(builder.add_rule(name + "-call",
|
|
1702
|
+
"( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
|
|
1703
|
+
"\" " + builder.add_schema(name + "-args", parameters) + " "
|
|
1704
|
+
"\"<|tool▁call▁end|>\""));
|
|
1705
|
+
});
|
|
1706
|
+
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
|
1707
|
+
// so we accept common variants (then it's all constrained)
|
|
1708
|
+
builder.add_rule("root",
|
|
1709
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
|
1710
|
+
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
|
|
1711
|
+
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
|
1712
|
+
"\"<|tool▁calls▁end|>\""
|
|
1713
|
+
" space");
|
|
1714
|
+
data.grammar_triggers.push_back({
|
|
1715
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
1716
|
+
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
1717
|
+
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1718
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
|
1719
|
+
"(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
|
|
1720
|
+
});
|
|
1721
|
+
data.preserved_tokens = {
|
|
1722
|
+
"<think>",
|
|
1723
|
+
"</think>",
|
|
1724
|
+
"<|tool▁calls▁begin|>",
|
|
1725
|
+
"<|tool▁call▁begin|>",
|
|
1726
|
+
"<|tool▁sep|>",
|
|
1727
|
+
"<|tool▁call▁end|>",
|
|
1728
|
+
"<|tool▁calls▁end|>",
|
|
1729
|
+
};
|
|
1730
|
+
});
|
|
1731
|
+
}
|
|
1732
|
+
return data;
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1316
1735
|
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
|
1317
1736
|
builder.try_parse_reasoning("<think>", "</think>");
|
|
1318
1737
|
if (!builder.syntax().parse_tool_calls) {
|
|
@@ -1334,9 +1753,357 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
|
|
1334
1753
|
tool_calls_end);
|
|
1335
1754
|
}
|
|
1336
1755
|
|
|
1756
|
+
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
|
|
1757
|
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
|
|
1758
|
+
|
|
1759
|
+
static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
|
|
1760
|
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
|
1761
|
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
|
1762
|
+
|
|
1763
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1764
|
+
LOG_DBG("%s: not parse_tool_calls\n", __func__);
|
|
1765
|
+
builder.add_content(builder.consume_rest());
|
|
1766
|
+
return;
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
LOG_DBG("%s: parse_tool_calls\n", __func__);
|
|
1770
|
+
|
|
1771
|
+
parse_json_tool_calls(
|
|
1772
|
+
builder,
|
|
1773
|
+
/* block_open= */ tool_calls_begin,
|
|
1774
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
1775
|
+
function_regex,
|
|
1776
|
+
close_regex,
|
|
1777
|
+
tool_calls_end);
|
|
1778
|
+
}
|
|
1779
|
+
|
|
1780
|
+
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
|
1781
|
+
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
|
1782
|
+
// First try to parse using the standard reasoning parsing method
|
|
1783
|
+
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
|
1784
|
+
|
|
1785
|
+
auto start_pos = builder.pos();
|
|
1786
|
+
auto found_end_think = builder.try_find_literal("</think>");
|
|
1787
|
+
builder.move_to(start_pos);
|
|
1788
|
+
|
|
1789
|
+
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
|
1790
|
+
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
|
|
1791
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
1792
|
+
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
|
1793
|
+
// If reasoning was parsed successfully, the remaining content is regular content
|
|
1794
|
+
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
|
|
1795
|
+
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
|
|
1796
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
1797
|
+
} else {
|
|
1798
|
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
|
1799
|
+
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
|
|
1800
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
1801
|
+
return;
|
|
1802
|
+
}
|
|
1803
|
+
// If no reasoning tags found, check if we should treat everything as reasoning
|
|
1804
|
+
if (builder.syntax().thinking_forced_open) {
|
|
1805
|
+
// If thinking is forced open but no tags found, treat everything as reasoning
|
|
1806
|
+
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
|
1807
|
+
builder.add_reasoning_content(builder.consume_rest());
|
|
1808
|
+
} else {
|
|
1809
|
+
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
|
|
1810
|
+
// <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
|
|
1811
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
|
|
1817
|
+
static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1818
|
+
common_chat_params data;
|
|
1819
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1820
|
+
|
|
1821
|
+
data.prompt = apply(tmpl, params);
|
|
1822
|
+
data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
|
|
1823
|
+
|
|
1824
|
+
// Handle thinking tags based on prompt ending
|
|
1825
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1826
|
+
if (!params.enable_thinking) {
|
|
1827
|
+
// Close the thinking tag immediately if thinking is disabled
|
|
1828
|
+
data.prompt += "</think>\n\n";
|
|
1829
|
+
} else {
|
|
1830
|
+
// Mark thinking as forced open (template started with <think>)
|
|
1831
|
+
data.thinking_forced_open = true;
|
|
1832
|
+
}
|
|
1833
|
+
}
|
|
1834
|
+
|
|
1835
|
+
// Preserve MiniMax-M2 special tokens
|
|
1836
|
+
data.preserved_tokens = {
|
|
1837
|
+
"<think>",
|
|
1838
|
+
"</think>",
|
|
1839
|
+
"<minimax:tool_call>",
|
|
1840
|
+
"</minimax:tool_call>",
|
|
1841
|
+
};
|
|
1842
|
+
|
|
1843
|
+
// build grammar for tool call
|
|
1844
|
+
static const xml_tool_call_format form {
|
|
1845
|
+
/* form.scope_start = */ "<minimax:tool_call>\n",
|
|
1846
|
+
/* form.tool_start = */ "<invoke name=\"",
|
|
1847
|
+
/* form.tool_sep = */ "\">\n",
|
|
1848
|
+
/* form.key_start = */ "<parameter name=\"",
|
|
1849
|
+
/* form.key_val_sep = */ "\">",
|
|
1850
|
+
/* form.val_end = */ "</parameter>\n",
|
|
1851
|
+
/* form.tool_end = */ "</invoke>\n",
|
|
1852
|
+
/* form.scope_end = */ "</minimax:tool_call>",
|
|
1853
|
+
};
|
|
1854
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1855
|
+
|
|
1856
|
+
return data;
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
|
|
1860
|
+
static const xml_tool_call_format form {
|
|
1861
|
+
/* form.scope_start = */ "<minimax:tool_call>",
|
|
1862
|
+
/* form.tool_start = */ "<invoke name=\"",
|
|
1863
|
+
/* form.tool_sep = */ "\">",
|
|
1864
|
+
/* form.key_start = */ "<parameter name=\"",
|
|
1865
|
+
/* form.key_val_sep = */ "\">",
|
|
1866
|
+
/* form.val_end = */ "</parameter>",
|
|
1867
|
+
/* form.tool_end = */ "</invoke>",
|
|
1868
|
+
/* form.scope_end = */ "</minimax:tool_call>",
|
|
1869
|
+
};
|
|
1870
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1874
|
+
common_chat_params data;
|
|
1875
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1876
|
+
|
|
1877
|
+
data.prompt = apply(tmpl, params);
|
|
1878
|
+
data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
|
|
1879
|
+
|
|
1880
|
+
data.preserved_tokens = {
|
|
1881
|
+
"<tool_call>",
|
|
1882
|
+
"</tool_call>",
|
|
1883
|
+
"<function=",
|
|
1884
|
+
"</function>",
|
|
1885
|
+
"<parameter=",
|
|
1886
|
+
"</parameter>",
|
|
1887
|
+
};
|
|
1888
|
+
|
|
1889
|
+
// build grammar for tool call
|
|
1890
|
+
static const xml_tool_call_format form {
|
|
1891
|
+
/* form.scope_start = */ "<tool_call>\n",
|
|
1892
|
+
/* form.tool_start = */ "<function=",
|
|
1893
|
+
/* form.tool_sep = */ ">\n",
|
|
1894
|
+
/* form.key_start = */ "<parameter=",
|
|
1895
|
+
/* form.key_val_sep = */ ">\n",
|
|
1896
|
+
/* form.val_end = */ "\n</parameter>\n",
|
|
1897
|
+
/* form.tool_end = */ "</function>\n",
|
|
1898
|
+
/* form.scope_end = */ "</tool_call>",
|
|
1899
|
+
};
|
|
1900
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1901
|
+
|
|
1902
|
+
return data;
|
|
1903
|
+
}
|
|
1904
|
+
|
|
1905
|
+
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
|
|
1906
|
+
static const xml_tool_call_format form = ([]() {
|
|
1907
|
+
xml_tool_call_format form {};
|
|
1908
|
+
form.scope_start = "<tool_call>";
|
|
1909
|
+
form.tool_start = "<function=";
|
|
1910
|
+
form.tool_sep = ">";
|
|
1911
|
+
form.key_start = "<parameter=";
|
|
1912
|
+
form.key_val_sep = ">";
|
|
1913
|
+
form.val_end = "</parameter>";
|
|
1914
|
+
form.tool_end = "</function>";
|
|
1915
|
+
form.scope_end = "</tool_call>";
|
|
1916
|
+
form.trim_raw_argval = true;
|
|
1917
|
+
return form;
|
|
1918
|
+
})();
|
|
1919
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1922
|
+
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1923
|
+
common_chat_params data;
|
|
1924
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1925
|
+
|
|
1926
|
+
data.prompt = apply(tmpl, params);
|
|
1927
|
+
data.format = COMMON_CHAT_FORMAT_KIMI_K2;
|
|
1928
|
+
|
|
1929
|
+
data.preserved_tokens = {
|
|
1930
|
+
"<think>",
|
|
1931
|
+
"</think>",
|
|
1932
|
+
"<|tool_calls_section_begin|>",
|
|
1933
|
+
"<|tool_call_begin|>",
|
|
1934
|
+
"<|tool_call_argument_begin|>",
|
|
1935
|
+
"<|tool_call_end|>",
|
|
1936
|
+
"<|tool_calls_section_end|>",
|
|
1937
|
+
"<|im_end|>",
|
|
1938
|
+
"<|im_system|>",
|
|
1939
|
+
"<|im_middle|>",
|
|
1940
|
+
};
|
|
1941
|
+
|
|
1942
|
+
data.additional_stops.insert(data.additional_stops.end(), {
|
|
1943
|
+
"<|im_end|>",
|
|
1944
|
+
"<|im_middle|>"
|
|
1945
|
+
});
|
|
1946
|
+
// build grammar for tool call
|
|
1947
|
+
static const xml_tool_call_format form = ([]() {
|
|
1948
|
+
xml_tool_call_format form {};
|
|
1949
|
+
form.scope_start = "<|tool_calls_section_begin|>";
|
|
1950
|
+
form.tool_start = "<|tool_call_begin|>";
|
|
1951
|
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
1952
|
+
form.key_start = "\"";
|
|
1953
|
+
form.key_val_sep = "\": ";
|
|
1954
|
+
form.val_end = ", ";
|
|
1955
|
+
form.tool_end = "}<|tool_call_end|>";
|
|
1956
|
+
form.scope_end = "<|tool_calls_section_end|>";
|
|
1957
|
+
form.raw_argval = false;
|
|
1958
|
+
form.last_val_end = "";
|
|
1959
|
+
return form;
|
|
1960
|
+
})();
|
|
1961
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1962
|
+
|
|
1963
|
+
return data;
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
|
1967
|
+
static const xml_tool_call_format form = ([]() {
|
|
1968
|
+
xml_tool_call_format form {};
|
|
1969
|
+
form.scope_start = "<|tool_calls_section_begin|>";
|
|
1970
|
+
form.tool_start = "<|tool_call_begin|>";
|
|
1971
|
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
1972
|
+
form.key_start = "\"";
|
|
1973
|
+
form.key_val_sep = "\": ";
|
|
1974
|
+
form.val_end = ", ";
|
|
1975
|
+
form.tool_end = "}<|tool_call_end|>";
|
|
1976
|
+
form.scope_end = "<|tool_calls_section_end|>";
|
|
1977
|
+
form.raw_argval = false;
|
|
1978
|
+
form.last_val_end = "";
|
|
1979
|
+
return form;
|
|
1980
|
+
})();
|
|
1981
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1985
|
+
common_chat_params data;
|
|
1986
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1987
|
+
|
|
1988
|
+
data.prompt = apply(tmpl, params);
|
|
1989
|
+
data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
|
|
1990
|
+
|
|
1991
|
+
data.preserved_tokens = {
|
|
1992
|
+
"<thinking>",
|
|
1993
|
+
"</thinking>",
|
|
1994
|
+
"<tool_calls>",
|
|
1995
|
+
"</tool_calls>",
|
|
1996
|
+
};
|
|
1997
|
+
|
|
1998
|
+
// build grammar for tool call
|
|
1999
|
+
static const xml_tool_call_format form = ([]() {
|
|
2000
|
+
xml_tool_call_format form {};
|
|
2001
|
+
form.scope_start = "<tool_calls>[";
|
|
2002
|
+
form.tool_start = "{\"name\": \"";
|
|
2003
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2004
|
+
form.key_start = "\"";
|
|
2005
|
+
form.key_val_sep = "\": ";
|
|
2006
|
+
form.val_end = ", ";
|
|
2007
|
+
form.tool_end = "}, ";
|
|
2008
|
+
form.scope_end = "]</tool_calls>";
|
|
2009
|
+
form.raw_argval = false;
|
|
2010
|
+
form.last_val_end = "";
|
|
2011
|
+
form.last_tool_end = "}";
|
|
2012
|
+
return form;
|
|
2013
|
+
})();
|
|
2014
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
2015
|
+
|
|
2016
|
+
return data;
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
|
|
2020
|
+
static const xml_tool_call_format form = ([]() {
|
|
2021
|
+
xml_tool_call_format form {};
|
|
2022
|
+
form.scope_start = "<tool_calls>[";
|
|
2023
|
+
form.tool_start = "{\"name\": \"";
|
|
2024
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2025
|
+
form.key_start = "\"";
|
|
2026
|
+
form.key_val_sep = "\": ";
|
|
2027
|
+
form.val_end = ", ";
|
|
2028
|
+
form.tool_end = "}, ";
|
|
2029
|
+
form.scope_end = "]</tool_calls>";
|
|
2030
|
+
form.raw_argval = false;
|
|
2031
|
+
form.last_val_end = "";
|
|
2032
|
+
form.last_tool_end = "}";
|
|
2033
|
+
return form;
|
|
2034
|
+
})();
|
|
2035
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
2039
|
+
common_chat_params data;
|
|
2040
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
2041
|
+
|
|
2042
|
+
data.prompt = apply(tmpl, params);
|
|
2043
|
+
data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
|
|
2044
|
+
|
|
2045
|
+
data.preserved_tokens = {
|
|
2046
|
+
"<tool_call>",
|
|
2047
|
+
"</tool_call>",
|
|
2048
|
+
};
|
|
2049
|
+
|
|
2050
|
+
// build grammar for tool call
|
|
2051
|
+
static const xml_tool_call_format form = ([]() {
|
|
2052
|
+
xml_tool_call_format form {};
|
|
2053
|
+
form.scope_start = "\n";
|
|
2054
|
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
|
2055
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2056
|
+
form.key_start = "\"";
|
|
2057
|
+
form.key_val_sep = "\": ";
|
|
2058
|
+
form.val_end = ", ";
|
|
2059
|
+
form.tool_end = "}\n</tool_call>";
|
|
2060
|
+
form.scope_end = "";
|
|
2061
|
+
form.raw_argval = false;
|
|
2062
|
+
form.last_val_end = "";
|
|
2063
|
+
return form;
|
|
2064
|
+
})();
|
|
2065
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
2066
|
+
|
|
2067
|
+
return data;
|
|
2068
|
+
}
|
|
2069
|
+
|
|
2070
|
+
static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
|
|
2071
|
+
static const xml_tool_call_format form = ([]() {
|
|
2072
|
+
xml_tool_call_format form {};
|
|
2073
|
+
form.scope_start = "";
|
|
2074
|
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
|
2075
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2076
|
+
form.key_start = "\"";
|
|
2077
|
+
form.key_val_sep = "\": ";
|
|
2078
|
+
form.val_end = ", ";
|
|
2079
|
+
form.tool_end = "}\n</tool_call>";
|
|
2080
|
+
form.scope_end = "";
|
|
2081
|
+
form.raw_argval = false;
|
|
2082
|
+
form.last_val_end = "";
|
|
2083
|
+
return form;
|
|
2084
|
+
})();
|
|
2085
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
2086
|
+
}
|
|
2087
|
+
|
|
1337
2088
|
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1338
2089
|
common_chat_params data;
|
|
1339
|
-
|
|
2090
|
+
|
|
2091
|
+
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
|
|
2092
|
+
auto adjusted_messages = json::array();
|
|
2093
|
+
for (const auto & msg : inputs.messages) {
|
|
2094
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
2095
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
2096
|
+
|
|
2097
|
+
if (has_reasoning_content && has_tool_calls) {
|
|
2098
|
+
auto adjusted_message = msg;
|
|
2099
|
+
adjusted_message["thinking"] = msg.at("reasoning_content");
|
|
2100
|
+
adjusted_messages.push_back(adjusted_message);
|
|
2101
|
+
} else {
|
|
2102
|
+
adjusted_messages.push_back(msg);
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
|
|
1340
2107
|
|
|
1341
2108
|
// Check if we need to replace the return token with end token during
|
|
1342
2109
|
// inference and without generation prompt. For more details see:
|
|
@@ -1411,17 +2178,36 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
|
|
|
1411
2178
|
);
|
|
1412
2179
|
});
|
|
1413
2180
|
|
|
1414
|
-
auto recipient_in_role = builder.add_rule("recipient_in_role",
|
|
1415
|
-
"\"<|start|>assistant\"? \" to=functions.\" ( " +
|
|
1416
|
-
string_join(tool_rules_recipient_in_role, " | ") + " )"
|
|
1417
|
-
);
|
|
1418
|
-
|
|
1419
2181
|
auto recipient_in_channel = builder.add_rule("recipient_in_channel",
|
|
1420
2182
|
channel + " \" to=functions.\" ( " +
|
|
1421
2183
|
string_join(tool_rules_recipient_in_channel, " | ") + " )"
|
|
1422
2184
|
);
|
|
1423
2185
|
|
|
1424
|
-
|
|
2186
|
+
if (data.grammar_lazy) {
|
|
2187
|
+
auto recipient_in_role = builder.add_rule("recipient_in_role",
|
|
2188
|
+
"\"<|start|>assistant\"? \" to=functions.\" ( " +
|
|
2189
|
+
string_join(tool_rules_recipient_in_role, " | ") + " )"
|
|
2190
|
+
);
|
|
2191
|
+
|
|
2192
|
+
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
|
|
2193
|
+
} else {
|
|
2194
|
+
auto not_end = builder.add_rule("not-end",
|
|
2195
|
+
"[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
|
|
2196
|
+
auto analysis = builder.add_rule("analysis",
|
|
2197
|
+
"\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
|
|
2198
|
+
auto commentary = builder.add_rule("commentary",
|
|
2199
|
+
"\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
|
|
2200
|
+
|
|
2201
|
+
auto recipient_in_role = builder.add_rule("recipient_in_role",
|
|
2202
|
+
"\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
|
|
2203
|
+
);
|
|
2204
|
+
|
|
2205
|
+
builder.add_rule("root",
|
|
2206
|
+
"( " + analysis + " \"<|start|>assistant\" )? " +
|
|
2207
|
+
"( " + commentary + " \"<|start|>assistant\" )? " +
|
|
2208
|
+
"( " + recipient_in_role + " | " + recipient_in_channel + " )"
|
|
2209
|
+
);
|
|
2210
|
+
}
|
|
1425
2211
|
|
|
1426
2212
|
// Trigger on tool calls that appear in the commentary channel
|
|
1427
2213
|
data.grammar_triggers.push_back({
|
|
@@ -1533,13 +2319,109 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
|
|
|
1533
2319
|
}
|
|
1534
2320
|
}
|
|
1535
2321
|
|
|
2322
|
+
static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2323
|
+
common_chat_params data;
|
|
2324
|
+
data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
2325
|
+
|
|
2326
|
+
std::string prompt = apply(tmpl, inputs);
|
|
2327
|
+
|
|
2328
|
+
// match the existing trimming behavior
|
|
2329
|
+
if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
|
|
2330
|
+
prompt.erase(0, tmpl.bos_token().size());
|
|
2331
|
+
}
|
|
2332
|
+
if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
|
|
2333
|
+
prompt.erase(prompt.size() - tmpl.eos_token().size());
|
|
2334
|
+
}
|
|
2335
|
+
if (string_ends_with(prompt, "<think>")) {
|
|
2336
|
+
if (!inputs.enable_thinking) {
|
|
2337
|
+
prompt += "</think>";
|
|
2338
|
+
} else {
|
|
2339
|
+
data.thinking_forced_open = true;
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
2342
|
+
|
|
2343
|
+
// add GLM preserved tokens
|
|
2344
|
+
data.preserved_tokens = {
|
|
2345
|
+
"<|endoftext|>",
|
|
2346
|
+
"[MASK]",
|
|
2347
|
+
"[gMASK]",
|
|
2348
|
+
"[sMASK]",
|
|
2349
|
+
"<sop>",
|
|
2350
|
+
"<eop>",
|
|
2351
|
+
"<|system|>",
|
|
2352
|
+
"<|user|>",
|
|
2353
|
+
"<|assistant|>",
|
|
2354
|
+
"<|observation|>",
|
|
2355
|
+
"<|begin_of_image|>",
|
|
2356
|
+
"<|end_of_image|>",
|
|
2357
|
+
"<|begin_of_video|>",
|
|
2358
|
+
"<|end_of_video|>",
|
|
2359
|
+
"<|begin_of_audio|>",
|
|
2360
|
+
"<|end_of_audio|>",
|
|
2361
|
+
"<|begin_of_transcription|>",
|
|
2362
|
+
"<|end_of_transcription|>",
|
|
2363
|
+
"<|code_prefix|>",
|
|
2364
|
+
"<|code_middle|>",
|
|
2365
|
+
"<|code_suffix|>",
|
|
2366
|
+
"/nothink",
|
|
2367
|
+
"<think>",
|
|
2368
|
+
"</think>",
|
|
2369
|
+
"<tool_call>",
|
|
2370
|
+
"</tool_call>",
|
|
2371
|
+
"<arg_key>",
|
|
2372
|
+
"</arg_key>",
|
|
2373
|
+
"<arg_value>",
|
|
2374
|
+
"</arg_value>"
|
|
2375
|
+
};
|
|
2376
|
+
|
|
2377
|
+
// extra GLM 4.5 stop word
|
|
2378
|
+
data.additional_stops.insert(data.additional_stops.end(), {
|
|
2379
|
+
"<|user|>",
|
|
2380
|
+
"<|observation|>"
|
|
2381
|
+
});
|
|
2382
|
+
|
|
2383
|
+
// build grammar for tool call
|
|
2384
|
+
static const xml_tool_call_format form {
|
|
2385
|
+
/* form.scope_start = */ "",
|
|
2386
|
+
/* form.tool_start = */ "\n<tool_call>",
|
|
2387
|
+
/* form.tool_sep = */ "\n",
|
|
2388
|
+
/* form.key_start = */ "<arg_key>",
|
|
2389
|
+
/* form.key_val_sep = */ "</arg_key>\n<arg_value>",
|
|
2390
|
+
/* form.val_end = */ "</arg_value>\n",
|
|
2391
|
+
/* form.tool_end = */ "</tool_call>\n",
|
|
2392
|
+
/* form.scope_end = */ "",
|
|
2393
|
+
};
|
|
2394
|
+
build_grammar_xml_tool_call(data, inputs.tools, form);
|
|
2395
|
+
|
|
2396
|
+
data.prompt = prompt;
|
|
2397
|
+
data.format = COMMON_CHAT_FORMAT_GLM_4_5;
|
|
2398
|
+
return data;
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
|
|
2402
|
+
static const xml_tool_call_format form {
|
|
2403
|
+
/* form.scope_start = */ "",
|
|
2404
|
+
/* form.tool_start = */ "<tool_call>",
|
|
2405
|
+
/* form.tool_sep = */ "",
|
|
2406
|
+
/* form.key_start = */ "<arg_key>",
|
|
2407
|
+
/* form.key_val_sep = */ "</arg_key>",
|
|
2408
|
+
/* form.val_end = */ "</arg_value>",
|
|
2409
|
+
/* form.tool_end = */ "</tool_call>",
|
|
2410
|
+
/* form.scope_end = */ "",
|
|
2411
|
+
/* form.key_val_sep2 = */ "<arg_value>",
|
|
2412
|
+
};
|
|
2413
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
2414
|
+
}
|
|
2415
|
+
|
|
1536
2416
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1537
2417
|
LOG_DBG("%s\n", __func__);
|
|
1538
2418
|
common_chat_params data;
|
|
1539
|
-
|
|
2419
|
+
const std::optional<json> tools_override = json();
|
|
2420
|
+
const std::optional<json> additional_context = json {
|
|
1540
2421
|
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
|
|
1541
2422
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
1542
|
-
}
|
|
2423
|
+
};
|
|
2424
|
+
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context);
|
|
1543
2425
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1544
2426
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1545
2427
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
@@ -1830,7 +2712,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
|
|
1830
2712
|
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
|
1831
2713
|
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
|
1832
2714
|
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
|
|
1833
|
-
"
|
|
2715
|
+
"\\s*("
|
|
1834
2716
|
"(?:<tool_call>"
|
|
1835
2717
|
"|<function"
|
|
1836
2718
|
"|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
|
|
@@ -2025,15 +2907,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
|
|
|
2025
2907
|
|
|
2026
2908
|
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
|
2027
2909
|
// Parse thinking tags
|
|
2910
|
+
static const common_regex start_think_regex(regex_escape("<think>"));
|
|
2911
|
+
static const common_regex end_think_regex(regex_escape("</think>"));
|
|
2912
|
+
// Granite models output partial tokens such as "<" and "<think".
|
|
2913
|
+
// By leveraging try_consume_regex()/try_find_regex() throwing
|
|
2914
|
+
// common_chat_msg_partial_exception for these partial tokens,
|
|
2915
|
+
// processing is interrupted and the tokens are not passed to add_content().
|
|
2916
|
+
if (auto res = builder.try_consume_regex(start_think_regex)) {
|
|
2917
|
+
// Restore position for try_parse_reasoning()
|
|
2918
|
+
builder.move_to(res->groups[0].begin);
|
|
2919
|
+
builder.try_find_regex(end_think_regex, std::string::npos, false);
|
|
2920
|
+
// Restore position for try_parse_reasoning()
|
|
2921
|
+
builder.move_to(res->groups[0].begin);
|
|
2922
|
+
}
|
|
2028
2923
|
builder.try_parse_reasoning("<think>", "</think>");
|
|
2029
2924
|
|
|
2030
|
-
// Parse response tags
|
|
2031
|
-
static const common_regex
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
builder.
|
|
2925
|
+
// Parse response tags
|
|
2926
|
+
static const common_regex start_response_regex(regex_escape("<response>"));
|
|
2927
|
+
static const common_regex end_response_regex(regex_escape("</response>"));
|
|
2928
|
+
// Granite models output partial tokens such as "<" and "<response".
|
|
2929
|
+
// Same hack as reasoning parsing.
|
|
2930
|
+
if (builder.try_consume_regex(start_response_regex)) {
|
|
2931
|
+
builder.try_find_regex(end_response_regex);
|
|
2037
2932
|
}
|
|
2038
2933
|
|
|
2039
2934
|
if (!builder.syntax().parse_tool_calls) {
|
|
@@ -2046,108 +2941,154 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
|
|
2046
2941
|
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
2047
2942
|
builder.move_to(res->groups[0].end);
|
|
2048
2943
|
|
|
2944
|
+
// Expect JSON array of tool calls
|
|
2945
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
|
|
2946
|
+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
|
|
2947
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2950
|
+
} else {
|
|
2951
|
+
builder.add_content(builder.consume_rest());
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
|
|
2955
|
+
static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
|
|
2956
|
+
// Parse thinking tags
|
|
2957
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
2958
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
2959
|
+
builder.add_content(builder.consume_rest());
|
|
2960
|
+
return;
|
|
2961
|
+
}
|
|
2962
|
+
|
|
2963
|
+
// Look for tool calls
|
|
2964
|
+
static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
|
|
2965
|
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
2966
|
+
builder.move_to(res->groups[0].end);
|
|
2967
|
+
|
|
2049
2968
|
// Expect JSON array of tool calls
|
|
2050
2969
|
auto tool_calls_data = builder.consume_json();
|
|
2051
2970
|
if (tool_calls_data.json.is_array()) {
|
|
2052
|
-
if (!builder.
|
|
2053
|
-
|
|
2971
|
+
if (!builder.try_consume_literal("</TOOLCALL>")) {
|
|
2972
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2054
2973
|
}
|
|
2974
|
+
builder.add_tool_calls(tool_calls_data.json);
|
|
2055
2975
|
} else {
|
|
2056
|
-
|
|
2976
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2057
2977
|
}
|
|
2058
|
-
}
|
|
2978
|
+
}
|
|
2979
|
+
builder.add_content(builder.consume_rest());
|
|
2980
|
+
}
|
|
2981
|
+
|
|
2982
|
+
static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
|
|
2983
|
+
// Parse thinking tags
|
|
2984
|
+
builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
|
|
2985
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
2059
2986
|
builder.add_content(builder.consume_rest());
|
|
2987
|
+
return;
|
|
2060
2988
|
}
|
|
2989
|
+
|
|
2990
|
+
// Look for tool calls
|
|
2991
|
+
static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
|
|
2992
|
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
2993
|
+
builder.move_to(res->groups[0].end);
|
|
2994
|
+
|
|
2995
|
+
auto tool_calls_data = builder.consume_json();
|
|
2996
|
+
if (tool_calls_data.json.is_array()) {
|
|
2997
|
+
builder.consume_spaces();
|
|
2998
|
+
if (!builder.try_consume_literal("<|tools_suffix|>")) {
|
|
2999
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
3000
|
+
}
|
|
3001
|
+
for (const auto & value : tool_calls_data.json) {
|
|
3002
|
+
if (value.is_object()) {
|
|
3003
|
+
builder.add_tool_call_short_form(value);
|
|
3004
|
+
}
|
|
3005
|
+
}
|
|
3006
|
+
} else {
|
|
3007
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
3008
|
+
}
|
|
3009
|
+
}
|
|
3010
|
+
builder.add_content(builder.consume_rest());
|
|
2061
3011
|
}
|
|
2062
3012
|
|
|
2063
|
-
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|
2064
|
-
// Parse thinking tags first - this handles the main reasoning content
|
|
2065
|
-
builder.try_parse_reasoning("<seed:think>", "</seed:think>");
|
|
2066
3013
|
|
|
3014
|
+
static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
|
|
2067
3015
|
if (!builder.syntax().parse_tool_calls) {
|
|
2068
3016
|
builder.add_content(builder.consume_rest());
|
|
2069
3017
|
return;
|
|
2070
3018
|
}
|
|
2071
3019
|
|
|
2072
|
-
//
|
|
2073
|
-
static const common_regex
|
|
2074
|
-
static const common_regex tool_call_end_regex("
|
|
2075
|
-
static const common_regex function_regex("<function=([^>]+)>");
|
|
2076
|
-
static const common_regex param_regex("<parameter=([^>]+)>");
|
|
3020
|
+
// LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
|
|
3021
|
+
static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
|
|
3022
|
+
static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
|
|
2077
3023
|
|
|
2078
|
-
|
|
2079
|
-
|
|
3024
|
+
// Loop through all tool calls
|
|
3025
|
+
while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
|
|
3026
|
+
builder.move_to(res->groups[0].end);
|
|
2080
3027
|
|
|
2081
|
-
//
|
|
2082
|
-
|
|
2083
|
-
auto function_name = builder.str(func_res->groups[1]);
|
|
3028
|
+
// Parse JSON array format: [{"name": "...", "arguments": {...}}]
|
|
3029
|
+
auto tool_calls_data = builder.consume_json();
|
|
2084
3030
|
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
if (
|
|
2095
|
-
|
|
2096
|
-
builder.move_to(savedPos);
|
|
2097
|
-
try {
|
|
2098
|
-
if (auto param_res = builder.try_consume_json()) {
|
|
2099
|
-
args[param_name] = param_res->json;
|
|
2100
|
-
} else {
|
|
2101
|
-
args[param_name] = param;
|
|
2102
|
-
}
|
|
2103
|
-
} catch (json::exception &) {
|
|
2104
|
-
args[param_name] = param;
|
|
2105
|
-
}
|
|
2106
|
-
} else {
|
|
2107
|
-
throw common_chat_msg_partial_exception("Incomplete tool parameter");
|
|
3031
|
+
// Consume end marker
|
|
3032
|
+
builder.consume_spaces();
|
|
3033
|
+
if (!builder.try_consume_regex(tool_call_end_regex)) {
|
|
3034
|
+
throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
|
|
3035
|
+
}
|
|
3036
|
+
|
|
3037
|
+
// Process each tool call in the array
|
|
3038
|
+
if (tool_calls_data.json.is_array()) {
|
|
3039
|
+
for (const auto & tool_call : tool_calls_data.json) {
|
|
3040
|
+
if (!tool_call.is_object()) {
|
|
3041
|
+
throw common_chat_msg_partial_exception("Tool call must be an object");
|
|
2108
3042
|
}
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
if (
|
|
2121
|
-
|
|
3043
|
+
|
|
3044
|
+
if (!tool_call.contains("name")) {
|
|
3045
|
+
throw common_chat_msg_partial_exception("Tool call missing 'name' field");
|
|
3046
|
+
}
|
|
3047
|
+
|
|
3048
|
+
std::string function_name = tool_call.at("name");
|
|
3049
|
+
std::string arguments = "{}";
|
|
3050
|
+
|
|
3051
|
+
if (tool_call.contains("arguments")) {
|
|
3052
|
+
if (tool_call.at("arguments").is_object()) {
|
|
3053
|
+
arguments = tool_call.at("arguments").dump();
|
|
3054
|
+
} else if (tool_call.at("arguments").is_string()) {
|
|
3055
|
+
arguments = tool_call.at("arguments");
|
|
2122
3056
|
}
|
|
2123
|
-
}
|
|
3057
|
+
}
|
|
3058
|
+
|
|
3059
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
|
2124
3060
|
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2125
3061
|
}
|
|
2126
|
-
} else {
|
|
2127
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2128
|
-
}
|
|
2129
|
-
// Look for closing tool call tag
|
|
2130
|
-
if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) {
|
|
2131
|
-
builder.move_to(end_tool->groups[0].end);
|
|
2132
|
-
builder.consume_spaces(); // Consume trailing whitespace after tool call
|
|
2133
|
-
} else {
|
|
2134
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2135
3062
|
}
|
|
2136
3063
|
} else {
|
|
2137
|
-
|
|
2138
|
-
break;
|
|
3064
|
+
throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
|
|
2139
3065
|
}
|
|
3066
|
+
|
|
3067
|
+
// Consume any trailing whitespace after this tool call
|
|
3068
|
+
builder.consume_spaces();
|
|
2140
3069
|
}
|
|
2141
3070
|
|
|
2142
|
-
// Consume any remaining
|
|
2143
|
-
builder.consume_spaces();
|
|
3071
|
+
// Consume any remaining content after all tool calls
|
|
2144
3072
|
auto remaining = builder.consume_rest();
|
|
2145
|
-
// If there's any non-whitespace content remaining, add it as content
|
|
2146
3073
|
if (!string_strip(remaining).empty()) {
|
|
2147
3074
|
builder.add_content(remaining);
|
|
2148
3075
|
}
|
|
2149
3076
|
}
|
|
2150
3077
|
|
|
3078
|
+
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|
3079
|
+
static const xml_tool_call_format form {
|
|
3080
|
+
/* form.scope_start = */ "<seed:tool_call>",
|
|
3081
|
+
/* form.tool_start = */ "<function=",
|
|
3082
|
+
/* form.tool_sep = */ ">",
|
|
3083
|
+
/* form.key_start = */ "<parameter=",
|
|
3084
|
+
/* form.key_val_sep = */ ">",
|
|
3085
|
+
/* form.val_end = */ "</parameter>",
|
|
3086
|
+
/* form.tool_end = */ "</function>",
|
|
3087
|
+
/* form.scope_end = */ "</seed:tool_call>",
|
|
3088
|
+
};
|
|
3089
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
|
3090
|
+
}
|
|
3091
|
+
|
|
2151
3092
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2152
3093
|
common_chat_params data;
|
|
2153
3094
|
data.prompt = apply(tmpl, inputs);
|
|
@@ -2263,6 +3204,12 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2263
3204
|
}
|
|
2264
3205
|
}
|
|
2265
3206
|
|
|
3207
|
+
// DeepSeek V3.1: detect based on specific patterns in the template
|
|
3208
|
+
if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
|
|
3209
|
+
params.json_schema.is_null()) {
|
|
3210
|
+
return common_chat_params_init_deepseek_v3_1(tmpl, params);
|
|
3211
|
+
}
|
|
3212
|
+
|
|
2266
3213
|
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
|
2267
3214
|
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
|
|
2268
3215
|
return common_chat_params_init_deepseek_r1(tmpl, params);
|
|
@@ -2278,6 +3225,35 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2278
3225
|
return common_chat_params_init_granite(tmpl, params);
|
|
2279
3226
|
}
|
|
2280
3227
|
|
|
3228
|
+
// GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
|
|
3229
|
+
if (src.find("[gMASK]<sop>") != std::string::npos &&
|
|
3230
|
+
src.find("<arg_key>") != std::string::npos &&
|
|
3231
|
+
src.find("<arg_value>") != std::string::npos &&
|
|
3232
|
+
params.json_schema.is_null()) {
|
|
3233
|
+
return common_chat_params_init_glm_4_5(tmpl, params);
|
|
3234
|
+
}
|
|
3235
|
+
|
|
3236
|
+
// Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
|
|
3237
|
+
// Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
|
|
3238
|
+
// Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
|
|
3239
|
+
if (src.find("<tool_call>") != std::string::npos &&
|
|
3240
|
+
src.find("<function>") != std::string::npos &&
|
|
3241
|
+
src.find("<function=") != std::string::npos &&
|
|
3242
|
+
src.find("<parameters>") != std::string::npos &&
|
|
3243
|
+
src.find("<parameter=") != std::string::npos) {
|
|
3244
|
+
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
|
3245
|
+
}
|
|
3246
|
+
|
|
3247
|
+
// Xiaomi MiMo format detection (must come before Hermes 2 Pro)
|
|
3248
|
+
if (src.find("<tools>") != std::string::npos &&
|
|
3249
|
+
src.find("# Tools") != std::string::npos &&
|
|
3250
|
+
src.find("</tools>") != std::string::npos &&
|
|
3251
|
+
src.find("<tool_calls>") != std::string::npos &&
|
|
3252
|
+
src.find("</tool_calls>") != std::string::npos &&
|
|
3253
|
+
src.find("<tool_response>") != std::string::npos) {
|
|
3254
|
+
return common_chat_params_init_xiaomi_mimo(tmpl, params);
|
|
3255
|
+
}
|
|
3256
|
+
|
|
2281
3257
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
2282
3258
|
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
|
2283
3259
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
@@ -2293,6 +3269,45 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2293
3269
|
return common_chat_params_init_seed_oss(tmpl, params, inputs);
|
|
2294
3270
|
}
|
|
2295
3271
|
|
|
3272
|
+
// Nemotron v2
|
|
3273
|
+
if (src.find("<SPECIAL_10>") != std::string::npos) {
|
|
3274
|
+
return common_chat_params_init_nemotron_v2(tmpl, params);
|
|
3275
|
+
}
|
|
3276
|
+
|
|
3277
|
+
// Apertus format detection
|
|
3278
|
+
if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
|
|
3279
|
+
return common_chat_params_init_apertus(tmpl, params);
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
// LFM2 (w/ tools)
|
|
3283
|
+
if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
|
|
3284
|
+
src.find("]<|tool_list_end|>") != std::string::npos) {
|
|
3285
|
+
return common_chat_params_init_lfm2(tmpl, params);
|
|
3286
|
+
}
|
|
3287
|
+
|
|
3288
|
+
// MiniMax-M2 format detection
|
|
3289
|
+
if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
|
|
3290
|
+
return common_chat_params_init_minimax_m2(tmpl, params);
|
|
3291
|
+
}
|
|
3292
|
+
|
|
3293
|
+
// Kimi K2 format detection
|
|
3294
|
+
if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
|
|
3295
|
+
src.find("<|tool_calls_section_begin|>") != std::string::npos &&
|
|
3296
|
+
src.find("## Return of") != std::string::npos) {
|
|
3297
|
+
return common_chat_params_init_kimi_k2(tmpl, params);
|
|
3298
|
+
}
|
|
3299
|
+
|
|
3300
|
+
// Apriel 1.5 format detection
|
|
3301
|
+
if (src.find("<thinking>") != std::string::npos &&
|
|
3302
|
+
src.find("</thinking>") != std::string::npos &&
|
|
3303
|
+
src.find("<available_tools>") != std::string::npos &&
|
|
3304
|
+
src.find("<|assistant|>") != std::string::npos &&
|
|
3305
|
+
src.find("<|tool_result|>") != std::string::npos &&
|
|
3306
|
+
src.find("<tool_calls>[") != std::string::npos &&
|
|
3307
|
+
src.find("]</tool_calls>") != std::string::npos) {
|
|
3308
|
+
return common_chat_params_init_apriel_1_5(tmpl, params);
|
|
3309
|
+
}
|
|
3310
|
+
|
|
2296
3311
|
// Use generic handler when mixing tools + JSON schema.
|
|
2297
3312
|
// TODO: support that mix in handlers below.
|
|
2298
3313
|
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
|
@@ -2321,6 +3336,10 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2321
3336
|
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
|
2322
3337
|
}
|
|
2323
3338
|
|
|
3339
|
+
if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
|
|
3340
|
+
return common_chat_params_init_magistral(tmpl, params);
|
|
3341
|
+
}
|
|
3342
|
+
|
|
2324
3343
|
// Plain handler (no tools)
|
|
2325
3344
|
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
2326
3345
|
return common_chat_params_init_without_tools(tmpl, params);
|
|
@@ -2340,7 +3359,7 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
2340
3359
|
const struct common_chat_templates * tmpls,
|
|
2341
3360
|
const struct common_chat_templates_inputs & inputs)
|
|
2342
3361
|
{
|
|
2343
|
-
|
|
3362
|
+
size_t alloc_size = 0;
|
|
2344
3363
|
std::vector<llama_chat_message> chat;
|
|
2345
3364
|
std::vector<std::string> contents;
|
|
2346
3365
|
|
|
@@ -2362,7 +3381,8 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
2362
3381
|
const auto & msg = inputs.messages[i];
|
|
2363
3382
|
const auto & content = contents[i];
|
|
2364
3383
|
chat.push_back({msg.role.c_str(), content.c_str()});
|
|
2365
|
-
|
|
3384
|
+
size_t msg_size = msg.role.size() + content.size();
|
|
3385
|
+
alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
|
|
2366
3386
|
}
|
|
2367
3387
|
|
|
2368
3388
|
std::vector<char> buf(alloc_size);
|
|
@@ -2384,6 +3404,11 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
2384
3404
|
res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
|
|
2385
3405
|
}
|
|
2386
3406
|
|
|
3407
|
+
// for safety, we check the result again
|
|
3408
|
+
if (res < 0 || (size_t) res > buf.size()) {
|
|
3409
|
+
throw std::runtime_error("failed to apply chat template, try using --jinja");
|
|
3410
|
+
}
|
|
3411
|
+
|
|
2387
3412
|
common_chat_params params;
|
|
2388
3413
|
params.prompt = std::string(buf.data(), res);
|
|
2389
3414
|
if (!inputs.json_schema.empty()) {
|
|
@@ -2405,6 +3430,7 @@ common_chat_params common_chat_templates_apply(
|
|
|
2405
3430
|
}
|
|
2406
3431
|
|
|
2407
3432
|
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
|
3433
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
2408
3434
|
builder.add_content(builder.consume_rest());
|
|
2409
3435
|
}
|
|
2410
3436
|
|
|
@@ -2421,6 +3447,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
2421
3447
|
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
|
2422
3448
|
common_chat_parse_mistral_nemo(builder);
|
|
2423
3449
|
break;
|
|
3450
|
+
case COMMON_CHAT_FORMAT_MAGISTRAL:
|
|
3451
|
+
common_chat_parse_magistral(builder);
|
|
3452
|
+
break;
|
|
2424
3453
|
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
|
2425
3454
|
common_chat_parse_llama_3_1(builder);
|
|
2426
3455
|
break;
|
|
@@ -2430,6 +3459,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
2430
3459
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
|
2431
3460
|
common_chat_parse_deepseek_r1(builder);
|
|
2432
3461
|
break;
|
|
3462
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
|
|
3463
|
+
common_chat_parse_deepseek_v3_1(builder);
|
|
3464
|
+
break;
|
|
2433
3465
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
|
2434
3466
|
common_chat_parse_functionary_v3_2(builder);
|
|
2435
3467
|
break;
|
|
@@ -2454,6 +3486,33 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
2454
3486
|
case COMMON_CHAT_FORMAT_SEED_OSS:
|
|
2455
3487
|
common_chat_parse_seed_oss(builder);
|
|
2456
3488
|
break;
|
|
3489
|
+
case COMMON_CHAT_FORMAT_NEMOTRON_V2:
|
|
3490
|
+
common_chat_parse_nemotron_v2(builder);
|
|
3491
|
+
break;
|
|
3492
|
+
case COMMON_CHAT_FORMAT_APERTUS:
|
|
3493
|
+
common_chat_parse_apertus(builder);
|
|
3494
|
+
break;
|
|
3495
|
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
|
|
3496
|
+
common_chat_parse_lfm2(builder);
|
|
3497
|
+
break;
|
|
3498
|
+
case COMMON_CHAT_FORMAT_MINIMAX_M2:
|
|
3499
|
+
common_chat_parse_minimax_m2(builder);
|
|
3500
|
+
break;
|
|
3501
|
+
case COMMON_CHAT_FORMAT_GLM_4_5:
|
|
3502
|
+
common_chat_parse_glm_4_5(builder);
|
|
3503
|
+
break;
|
|
3504
|
+
case COMMON_CHAT_FORMAT_KIMI_K2:
|
|
3505
|
+
common_chat_parse_kimi_k2(builder);
|
|
3506
|
+
break;
|
|
3507
|
+
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
|
|
3508
|
+
common_chat_parse_qwen3_coder_xml(builder);
|
|
3509
|
+
break;
|
|
3510
|
+
case COMMON_CHAT_FORMAT_APRIEL_1_5:
|
|
3511
|
+
common_chat_parse_apriel_1_5(builder);
|
|
3512
|
+
break;
|
|
3513
|
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
|
3514
|
+
common_chat_parse_xiaomi_mimo(builder);
|
|
3515
|
+
break;
|
|
2457
3516
|
default:
|
|
2458
3517
|
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
|
2459
3518
|
}
|