cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
// __ _____ _____ _____
|
2
|
+
// __| | __| | | | JSON for Modern C++
|
3
|
+
// | | |__ | | | | | | version 3.12.0
|
4
|
+
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
|
5
|
+
//
|
6
|
+
// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
|
7
|
+
// SPDX-License-Identifier: MIT
|
8
|
+
|
9
|
+
#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
10
|
+
#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
11
|
+
|
12
|
+
#include <cstdint> // int64_t, uint64_t
|
13
|
+
#include <map> // map
|
14
|
+
#include <memory> // allocator
|
15
|
+
#include <string> // string
|
16
|
+
#include <vector> // vector
|
17
|
+
|
18
|
+
// #include <nlohmann/detail/abi_macros.hpp>
|
19
|
+
// __ _____ _____ _____
|
20
|
+
// __| | __| | | | JSON for Modern C++
|
21
|
+
// | | |__ | | | | | | version 3.12.0
|
22
|
+
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
|
23
|
+
//
|
24
|
+
// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
|
25
|
+
// SPDX-License-Identifier: MIT
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
// This file contains all macro definitions affecting or depending on the ABI
|
30
|
+
|
31
|
+
#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
|
32
|
+
#if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
|
33
|
+
#if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0
|
34
|
+
#warning "Already included a different version of the library!"
|
35
|
+
#endif
|
36
|
+
#endif
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
|
40
|
+
#define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum)
|
41
|
+
#define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum)
|
42
|
+
|
43
|
+
#ifndef JSON_DIAGNOSTICS
|
44
|
+
#define JSON_DIAGNOSTICS 0
|
45
|
+
#endif
|
46
|
+
|
47
|
+
#ifndef JSON_DIAGNOSTIC_POSITIONS
|
48
|
+
#define JSON_DIAGNOSTIC_POSITIONS 0
|
49
|
+
#endif
|
50
|
+
|
51
|
+
#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
|
52
|
+
#define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
|
53
|
+
#endif
|
54
|
+
|
55
|
+
#if JSON_DIAGNOSTICS
|
56
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
|
57
|
+
#else
|
58
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#if JSON_DIAGNOSTIC_POSITIONS
|
62
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp
|
63
|
+
#else
|
64
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS
|
65
|
+
#endif
|
66
|
+
|
67
|
+
#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
|
68
|
+
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
|
69
|
+
#else
|
70
|
+
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
|
74
|
+
#define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
|
75
|
+
#endif
|
76
|
+
|
77
|
+
// Construct the namespace ABI tags component
|
78
|
+
#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c
|
79
|
+
#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \
|
80
|
+
NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c)
|
81
|
+
|
82
|
+
#define NLOHMANN_JSON_ABI_TAGS \
|
83
|
+
NLOHMANN_JSON_ABI_TAGS_CONCAT( \
|
84
|
+
NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
|
85
|
+
NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \
|
86
|
+
NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS)
|
87
|
+
|
88
|
+
// Construct the namespace version component
|
89
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
|
90
|
+
_v ## major ## _ ## minor ## _ ## patch
|
91
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
|
92
|
+
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
|
93
|
+
|
94
|
+
#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
|
95
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION
|
96
|
+
#else
|
97
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION \
|
98
|
+
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
|
99
|
+
NLOHMANN_JSON_VERSION_MINOR, \
|
100
|
+
NLOHMANN_JSON_VERSION_PATCH)
|
101
|
+
#endif
|
102
|
+
|
103
|
+
// Combine namespace components
|
104
|
+
#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
|
105
|
+
#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
|
106
|
+
NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
|
107
|
+
|
108
|
+
#ifndef NLOHMANN_JSON_NAMESPACE
|
109
|
+
#define NLOHMANN_JSON_NAMESPACE \
|
110
|
+
nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
|
111
|
+
NLOHMANN_JSON_ABI_TAGS, \
|
112
|
+
NLOHMANN_JSON_NAMESPACE_VERSION)
|
113
|
+
#endif
|
114
|
+
|
115
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
|
116
|
+
#define NLOHMANN_JSON_NAMESPACE_BEGIN \
|
117
|
+
namespace nlohmann \
|
118
|
+
{ \
|
119
|
+
inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
|
120
|
+
NLOHMANN_JSON_ABI_TAGS, \
|
121
|
+
NLOHMANN_JSON_NAMESPACE_VERSION) \
|
122
|
+
{
|
123
|
+
#endif
|
124
|
+
|
125
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_END
|
126
|
+
#define NLOHMANN_JSON_NAMESPACE_END \
|
127
|
+
} /* namespace (inline namespace) NOLINT(readability/namespace) */ \
|
128
|
+
} // namespace nlohmann
|
129
|
+
#endif
|
130
|
+
|
131
|
+
|
132
|
+
/*!
|
133
|
+
@brief namespace for Niels Lohmann
|
134
|
+
@see https://github.com/nlohmann
|
135
|
+
@since version 1.0.0
|
136
|
+
*/
|
137
|
+
NLOHMANN_JSON_NAMESPACE_BEGIN
|
138
|
+
|
139
|
+
/*!
|
140
|
+
@brief default JSONSerializer template argument
|
141
|
+
|
142
|
+
This serializer ignores the template arguments and uses ADL
|
143
|
+
([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
|
144
|
+
for serialization.
|
145
|
+
*/
|
146
|
+
template<typename T = void, typename SFINAE = void>
|
147
|
+
struct adl_serializer;
|
148
|
+
|
149
|
+
/// a class to store JSON values
|
150
|
+
/// @sa https://json.nlohmann.me/api/basic_json/
|
151
|
+
template<template<typename U, typename V, typename... Args> class ObjectType =
|
152
|
+
std::map,
|
153
|
+
template<typename U, typename... Args> class ArrayType = std::vector,
|
154
|
+
class StringType = std::string, class BooleanType = bool,
|
155
|
+
class NumberIntegerType = std::int64_t,
|
156
|
+
class NumberUnsignedType = std::uint64_t,
|
157
|
+
class NumberFloatType = double,
|
158
|
+
template<typename U> class AllocatorType = std::allocator,
|
159
|
+
template<typename T, typename SFINAE = void> class JSONSerializer =
|
160
|
+
adl_serializer,
|
161
|
+
class BinaryType = std::vector<std::uint8_t>, // cppcheck-suppress syntaxError
|
162
|
+
class CustomBaseClass = void>
|
163
|
+
class basic_json;
|
164
|
+
|
165
|
+
/// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
|
166
|
+
/// @sa https://json.nlohmann.me/api/json_pointer/
|
167
|
+
template<typename RefStringType>
|
168
|
+
class json_pointer;
|
169
|
+
|
170
|
+
/*!
|
171
|
+
@brief default specialization
|
172
|
+
@sa https://json.nlohmann.me/api/json/
|
173
|
+
*/
|
174
|
+
using json = basic_json<>;
|
175
|
+
|
176
|
+
/// @brief a minimal map-like container that preserves insertion order
|
177
|
+
/// @sa https://json.nlohmann.me/api/ordered_map/
|
178
|
+
template<class Key, class T, class IgnoredLess, class Allocator>
|
179
|
+
struct ordered_map;
|
180
|
+
|
181
|
+
/// @brief specialization that maintains the insertion order of object keys
|
182
|
+
/// @sa https://json.nlohmann.me/api/ordered_json/
|
183
|
+
using ordered_json = basic_json<nlohmann::ordered_map>;
|
184
|
+
|
185
|
+
NLOHMANN_JSON_NAMESPACE_END
|
186
|
+
|
187
|
+
#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <regex>
|
4
|
+
#include <string>
|
5
|
+
|
6
|
+
enum common_regex_match_type {
|
7
|
+
COMMON_REGEX_MATCH_TYPE_NONE,
|
8
|
+
COMMON_REGEX_MATCH_TYPE_PARTIAL,
|
9
|
+
COMMON_REGEX_MATCH_TYPE_FULL,
|
10
|
+
};
|
11
|
+
|
12
|
+
struct common_string_range {
|
13
|
+
size_t begin;
|
14
|
+
size_t end;
|
15
|
+
common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
|
16
|
+
if (begin > end) {
|
17
|
+
throw std::runtime_error("Invalid range");
|
18
|
+
}
|
19
|
+
}
|
20
|
+
// prevent default ctor
|
21
|
+
common_string_range() = delete;
|
22
|
+
bool empty() const {
|
23
|
+
return begin == end;
|
24
|
+
}
|
25
|
+
bool operator==(const common_string_range & other) const {
|
26
|
+
return begin == other.begin && end == other.end;
|
27
|
+
}
|
28
|
+
};
|
29
|
+
|
30
|
+
struct common_regex_match {
|
31
|
+
common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
|
32
|
+
std::vector<common_string_range> groups;
|
33
|
+
|
34
|
+
bool operator==(const common_regex_match & other) const {
|
35
|
+
return type == other.type && groups == other.groups;
|
36
|
+
}
|
37
|
+
bool operator!=(const common_regex_match & other) const {
|
38
|
+
return !(*this == other);
|
39
|
+
}
|
40
|
+
};
|
41
|
+
|
42
|
+
class common_regex {
|
43
|
+
std::string pattern;
|
44
|
+
std::regex rx;
|
45
|
+
std::regex rx_reversed_partial;
|
46
|
+
|
47
|
+
public:
|
48
|
+
explicit common_regex(const std::string & pattern);
|
49
|
+
|
50
|
+
common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
|
51
|
+
|
52
|
+
const std::string & str() const { return pattern; }
|
53
|
+
};
|
54
|
+
|
55
|
+
// For testing only (pretty print of failures).
|
56
|
+
std::string regex_to_reversed_partial_regex(const std::string & pattern);
|
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
#include <sstream>
|
5
5
|
#include <iostream>
|
6
|
+
#include <thread>
|
6
7
|
#include "chat.h"
|
7
8
|
#include "common.h"
|
8
9
|
#include "ggml.h"
|
@@ -10,10 +11,13 @@
|
|
10
11
|
#include "llama.h"
|
11
12
|
#include "llama-impl.h"
|
12
13
|
#include "sampling.h"
|
14
|
+
#include "nlohmann/json.hpp"
|
13
15
|
#if defined(__ANDROID__)
|
14
16
|
#include <android/log.h>
|
15
17
|
#endif
|
16
18
|
|
19
|
+
using json = nlohmann::ordered_json;
|
20
|
+
|
17
21
|
namespace rnllama {
|
18
22
|
|
19
23
|
std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
|
@@ -43,6 +47,8 @@ struct completion_token_output
|
|
43
47
|
|
44
48
|
struct llama_rn_context_mtmd;
|
45
49
|
|
50
|
+
struct llama_rn_context_vocoder;
|
51
|
+
|
46
52
|
struct llama_rn_tokenize_result {
|
47
53
|
std::vector<llama_token> tokens;
|
48
54
|
bool has_media = false;
|
@@ -51,6 +57,12 @@ struct llama_rn_tokenize_result {
|
|
51
57
|
std::vector<size_t> chunk_pos_media; // media only
|
52
58
|
};
|
53
59
|
|
60
|
+
enum tts_type {
|
61
|
+
UNKNOWN = -1,
|
62
|
+
OUTETTS_V0_2 = 1,
|
63
|
+
OUTETTS_V0_3 = 2,
|
64
|
+
};
|
65
|
+
|
54
66
|
// Main context class
|
55
67
|
struct llama_rn_context {
|
56
68
|
bool is_predicting = false;
|
@@ -58,6 +70,7 @@ struct llama_rn_context {
|
|
58
70
|
bool has_next_token = false;
|
59
71
|
std::string generated_text;
|
60
72
|
std::vector<completion_token_output> generated_token_probs;
|
73
|
+
std::vector<llama_token> audio_tokens;
|
61
74
|
|
62
75
|
size_t num_prompt_tokens = 0;
|
63
76
|
size_t num_tokens_predicted = 0;
|
@@ -69,6 +82,9 @@ struct llama_rn_context {
|
|
69
82
|
common_params params;
|
70
83
|
common_init_result llama_init;
|
71
84
|
|
85
|
+
bool next_token_uses_guide_token = true;
|
86
|
+
std::vector<llama_token> guide_tokens;
|
87
|
+
|
72
88
|
llama_model *model = nullptr;
|
73
89
|
float loading_progress = 0;
|
74
90
|
bool is_load_interrupted = false;
|
@@ -92,6 +108,9 @@ struct llama_rn_context {
|
|
92
108
|
llama_rn_context_mtmd *mtmd_wrapper = nullptr;
|
93
109
|
bool has_multimodal = false;
|
94
110
|
|
111
|
+
llama_rn_context_vocoder *vocoder_wrapper = nullptr;
|
112
|
+
bool has_vocoder = false;
|
113
|
+
|
95
114
|
~llama_rn_context();
|
96
115
|
|
97
116
|
void rewind();
|
@@ -104,7 +123,8 @@ struct llama_rn_context {
|
|
104
123
|
const std::string &json_schema,
|
105
124
|
const std::string &tools,
|
106
125
|
const bool ¶llel_tool_calls,
|
107
|
-
const std::string &tool_choice
|
126
|
+
const std::string &tool_choice,
|
127
|
+
const bool &enable_thinking
|
108
128
|
) const;
|
109
129
|
std::string getFormattedChat(
|
110
130
|
const std::string &messages,
|
@@ -112,12 +132,14 @@ struct llama_rn_context {
|
|
112
132
|
) const;
|
113
133
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens);
|
114
134
|
void loadPrompt(const std::vector<std::string> &media_paths);
|
135
|
+
void setGuideTokens(const std::vector<llama_token> &tokens);
|
115
136
|
void beginCompletion();
|
116
137
|
void endCompletion();
|
117
138
|
completion_token_output nextToken();
|
118
139
|
size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
|
119
140
|
completion_token_output doCompletion();
|
120
141
|
std::vector<float> getEmbedding(common_params &embd_params);
|
142
|
+
std::vector<float> rerank(const std::string &query, const std::vector<std::string> &documents);
|
121
143
|
std::string bench(int pp, int tg, int pl, int nr);
|
122
144
|
int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
|
123
145
|
void removeLoraAdapters();
|
@@ -137,6 +159,15 @@ struct llama_rn_context {
|
|
137
159
|
);
|
138
160
|
|
139
161
|
llama_rn_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
|
162
|
+
|
163
|
+
// Vocoder methods
|
164
|
+
bool initVocoder(const std::string &vocoder_model_path);
|
165
|
+
tts_type getTTSType(json speaker = nullptr);
|
166
|
+
std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
|
167
|
+
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
168
|
+
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
169
|
+
bool isVocoderEnabled() const;
|
170
|
+
void releaseVocoder();
|
140
171
|
};
|
141
172
|
|
142
173
|
// Logging macros
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
namespace rnllama {
|
4
|
+
|
5
|
+
// the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
|
6
|
+
static const std::string default_audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
|
7
|
+
static const std::string default_audio_data = R"(<|audio_start|>
|
8
|
+
the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
|
9
|
+
overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
|
10
|
+
package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
|
11
|
+
from<|t_0.19|><|code_start|><|604|><|782|><|1682|><|872|><|1532|><|1600|><|1036|><|1761|><|647|><|1554|><|1371|><|653|><|1595|><|950|><|code_end|>
|
12
|
+
just<|t_0.25|><|code_start|><|1782|><|1670|><|317|><|786|><|1748|><|631|><|599|><|1155|><|1364|><|1524|><|36|><|1591|><|889|><|1535|><|541|><|440|><|1532|><|50|><|870|><|code_end|>
|
13
|
+
two<|t_0.24|><|code_start|><|1681|><|1510|><|673|><|799|><|805|><|1342|><|330|><|519|><|62|><|640|><|1138|><|565|><|1552|><|1497|><|1552|><|572|><|1715|><|1732|><|code_end|>
|
14
|
+
people<|t_0.39|><|code_start|><|593|><|274|><|136|><|740|><|691|><|633|><|1484|><|1061|><|1138|><|1485|><|344|><|428|><|397|><|1562|><|645|><|917|><|1035|><|1449|><|1669|><|487|><|442|><|1484|><|1329|><|1832|><|1704|><|600|><|761|><|653|><|269|><|code_end|>
|
15
|
+
is<|t_0.16|><|code_start|><|566|><|583|><|1755|><|646|><|1337|><|709|><|802|><|1008|><|485|><|1583|><|652|><|10|><|code_end|>
|
16
|
+
pretty<|t_0.32|><|code_start|><|1818|><|1747|><|692|><|733|><|1010|><|534|><|406|><|1697|><|1053|><|1521|><|1355|><|1274|><|816|><|1398|><|211|><|1218|><|817|><|1472|><|1703|><|686|><|13|><|822|><|445|><|1068|><|code_end|>
|
17
|
+
remarkable<|t_0.68|><|code_start|><|230|><|1048|><|1705|><|355|><|706|><|1149|><|1535|><|1787|><|1356|><|1396|><|835|><|1583|><|486|><|1249|><|286|><|937|><|1076|><|1150|><|614|><|42|><|1058|><|705|><|681|><|798|><|934|><|490|><|514|><|1399|><|572|><|1446|><|1703|><|1346|><|1040|><|1426|><|1304|><|664|><|171|><|1530|><|625|><|64|><|1708|><|1830|><|1030|><|443|><|1509|><|1063|><|1605|><|1785|><|721|><|1440|><|923|><|code_end|>
|
18
|
+
sure<|t_0.36|><|code_start|><|792|><|1780|><|923|><|1640|><|265|><|261|><|1525|><|567|><|1491|><|1250|><|1730|><|362|><|919|><|1766|><|543|><|1|><|333|><|113|><|970|><|252|><|1606|><|133|><|302|><|1810|><|1046|><|1190|><|1675|><|code_end|>
|
19
|
+
i<|t_0.08|><|code_start|><|123|><|439|><|1074|><|705|><|1799|><|637|><|code_end|>
|
20
|
+
have<|t_0.16|><|code_start|><|1509|><|599|><|518|><|1170|><|552|><|1029|><|1267|><|864|><|419|><|143|><|1061|><|0|><|code_end|>
|
21
|
+
some<|t_0.16|><|code_start|><|619|><|400|><|1270|><|62|><|1370|><|1832|><|917|><|1661|><|167|><|269|><|1366|><|1508|><|code_end|>
|
22
|
+
critiques<|t_0.60|><|code_start|><|559|><|584|><|1163|><|1129|><|1313|><|1728|><|721|><|1146|><|1093|><|577|><|928|><|27|><|630|><|1080|><|1346|><|1337|><|320|><|1382|><|1175|><|1682|><|1556|><|990|><|1683|><|860|><|1721|><|110|><|786|><|376|><|1085|><|756|><|1523|><|234|><|1334|><|1506|><|1578|><|659|><|612|><|1108|><|1466|><|1647|><|308|><|1470|><|746|><|556|><|1061|><|code_end|>
|
23
|
+
about<|t_0.29|><|code_start|><|26|><|1649|><|545|><|1367|><|1263|><|1728|><|450|><|859|><|1434|><|497|><|1220|><|1285|><|179|><|755|><|1154|><|779|><|179|><|1229|><|1213|><|922|><|1774|><|1408|><|code_end|>
|
24
|
+
some<|t_0.23|><|code_start|><|986|><|28|><|1649|><|778|><|858|><|1519|><|1|><|18|><|26|><|1042|><|1174|><|1309|><|1499|><|1712|><|1692|><|1516|><|1574|><|code_end|>
|
25
|
+
of<|t_0.07|><|code_start|><|197|><|716|><|1039|><|1662|><|64|><|code_end|>
|
26
|
+
the<|t_0.08|><|code_start|><|1811|><|1568|><|569|><|886|><|1025|><|1374|><|code_end|>
|
27
|
+
gameplay<|t_0.48|><|code_start|><|1269|><|1092|><|933|><|1362|><|1762|><|1700|><|1675|><|215|><|781|><|1086|><|461|><|838|><|1022|><|759|><|649|><|1416|><|1004|><|551|><|909|><|787|><|343|><|830|><|1391|><|1040|><|1622|><|1779|><|1360|><|1231|><|1187|><|1317|><|76|><|997|><|989|><|978|><|737|><|189|><|code_end|>
|
28
|
+
aspects<|t_0.56|><|code_start|><|1423|><|797|><|1316|><|1222|><|147|><|719|><|1347|><|386|><|1390|><|1558|><|154|><|440|><|634|><|592|><|1097|><|1718|><|712|><|763|><|1118|><|1721|><|1311|><|868|><|580|><|362|><|1435|><|868|><|247|><|221|><|886|><|1145|><|1274|><|1284|><|457|><|1043|><|1459|><|1818|><|62|><|599|><|1035|><|62|><|1649|><|778|><|code_end|>
|
29
|
+
but<|t_0.20|><|code_start|><|780|><|1825|><|1681|><|1007|><|861|><|710|><|702|><|939|><|1669|><|1491|><|613|><|1739|><|823|><|1469|><|648|><|code_end|>
|
30
|
+
its<|t_0.09|><|code_start|><|92|><|688|><|1623|><|962|><|1670|><|527|><|599|><|code_end|>
|
31
|
+
still<|t_0.27|><|code_start|><|636|><|10|><|1217|><|344|><|713|><|957|><|823|><|154|><|1649|><|1286|><|508|><|214|><|1760|><|1250|><|456|><|1352|><|1368|><|921|><|615|><|5|><|code_end|>
|
32
|
+
really<|t_0.36|><|code_start|><|55|><|420|><|1008|><|1659|><|27|><|644|><|1266|><|617|><|761|><|1712|><|109|><|1465|><|1587|><|503|><|1541|><|619|><|197|><|1019|><|817|><|269|><|377|><|362|><|1381|><|507|><|1488|><|4|><|1695|><|code_end|>
|
33
|
+
enjoyable<|t_0.49|><|code_start|><|678|><|501|><|864|><|319|><|288|><|1472|><|1341|><|686|><|562|><|1463|><|619|><|1563|><|471|><|911|><|730|><|1811|><|1006|><|520|><|861|><|1274|><|125|><|1431|><|638|><|621|><|153|><|876|><|1770|><|437|><|987|><|1653|><|1109|><|898|><|1285|><|80|><|593|><|1709|><|843|><|code_end|>
|
34
|
+
and<|t_0.15|><|code_start|><|1285|><|987|><|303|><|1037|><|730|><|1164|><|502|><|120|><|1737|><|1655|><|1318|><|code_end|>
|
35
|
+
it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><|code_end|>
|
36
|
+
looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
|
37
|
+
lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
|
38
|
+
|
39
|
+
}
|
Binary file
|
Binary file
|
package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "chat.h"
|
4
|
+
#include "json-partial.h"
|
5
|
+
#include "regex-partial.h"
|
6
|
+
|
7
|
+
#include "nlohmann/json.hpp"
|
8
|
+
|
9
|
+
#include <optional>
|
10
|
+
#include <string>
|
11
|
+
#include <vector>
|
12
|
+
|
13
|
+
class common_chat_msg_partial_exception : public std::runtime_error {
|
14
|
+
public:
|
15
|
+
common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
|
16
|
+
};
|
17
|
+
|
18
|
+
class common_chat_msg_parser {
|
19
|
+
std::string input_;
|
20
|
+
bool is_partial_;
|
21
|
+
common_chat_syntax syntax_;
|
22
|
+
std::string healing_marker_;
|
23
|
+
|
24
|
+
size_t pos_ = 0;
|
25
|
+
common_chat_msg result_;
|
26
|
+
|
27
|
+
public:
|
28
|
+
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
29
|
+
const std::string & input() const { return input_; }
|
30
|
+
size_t pos() const { return pos_; }
|
31
|
+
const std::string & healing_marker() const { return healing_marker_; }
|
32
|
+
const bool & is_partial() const { return is_partial_; }
|
33
|
+
const common_chat_msg & result() const { return result_; }
|
34
|
+
const common_chat_syntax & syntax() const { return syntax_; }
|
35
|
+
|
36
|
+
void move_to(size_t pos) {
|
37
|
+
if (pos > input_.size()) {
|
38
|
+
throw std::runtime_error("Invalid position!");
|
39
|
+
}
|
40
|
+
pos_ = pos;
|
41
|
+
}
|
42
|
+
void move_back(size_t n) {
|
43
|
+
if (pos_ < n) {
|
44
|
+
throw std::runtime_error("Can't move back that far!");
|
45
|
+
}
|
46
|
+
pos_ -= n;
|
47
|
+
}
|
48
|
+
|
49
|
+
// Get the substring of the input at the given range
|
50
|
+
std::string str(const common_string_range & rng) const;
|
51
|
+
|
52
|
+
// Appends to the result.content field
|
53
|
+
void add_content(const std::string & content);
|
54
|
+
|
55
|
+
// Appends to the result.reasoning_content field
|
56
|
+
void add_reasoning_content(const std::string & reasoning_content);
|
57
|
+
|
58
|
+
// Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
|
59
|
+
bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
|
60
|
+
|
61
|
+
// Adds a tool call using the "name", "id" and "arguments" fields of the json object
|
62
|
+
bool add_tool_call(const nlohmann::ordered_json & tool_call);
|
63
|
+
|
64
|
+
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
|
65
|
+
bool add_tool_calls(const nlohmann::ordered_json & arr);
|
66
|
+
|
67
|
+
void finish();
|
68
|
+
|
69
|
+
bool consume_spaces();
|
70
|
+
|
71
|
+
void consume_literal(const std::string & literal);
|
72
|
+
|
73
|
+
bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
|
74
|
+
|
75
|
+
std::string consume_rest();
|
76
|
+
|
77
|
+
struct find_regex_result {
|
78
|
+
std::string prelude;
|
79
|
+
std::vector<common_string_range> groups;
|
80
|
+
};
|
81
|
+
|
82
|
+
std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
|
83
|
+
|
84
|
+
bool try_consume_literal(const std::string & literal);
|
85
|
+
|
86
|
+
std::optional<find_regex_result> try_find_literal(const std::string & literal);
|
87
|
+
|
88
|
+
find_regex_result consume_regex(const common_regex & regex);
|
89
|
+
|
90
|
+
std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
|
91
|
+
|
92
|
+
std::optional<common_json> try_consume_json();
|
93
|
+
common_json consume_json();
|
94
|
+
|
95
|
+
struct consume_json_result {
|
96
|
+
nlohmann::ordered_json value;
|
97
|
+
bool is_partial;
|
98
|
+
};
|
99
|
+
|
100
|
+
/*
|
101
|
+
Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
|
102
|
+
|
103
|
+
By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
|
104
|
+
e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
|
105
|
+
|
106
|
+
But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
|
107
|
+
- with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
|
108
|
+
- with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
|
109
|
+
*/
|
110
|
+
consume_json_result consume_json_with_dumped_args(
|
111
|
+
const std::vector<std::vector<std::string>> & args_paths = {},
|
112
|
+
const std::vector<std::vector<std::string>> & content_paths = {}
|
113
|
+
);
|
114
|
+
std::optional<consume_json_result> try_consume_json_with_dumped_args(
|
115
|
+
const std::vector<std::vector<std::string>> & args_paths = {},
|
116
|
+
const std::vector<std::vector<std::string>> & content_paths = {}
|
117
|
+
);
|
118
|
+
|
119
|
+
void clear_tools();
|
120
|
+
};
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#pragma once
|
4
4
|
|
5
5
|
#include "common.h"
|
6
|
+
#include <functional>
|
6
7
|
#include <chrono>
|
7
8
|
#include <string>
|
8
9
|
#include <vector>
|
@@ -21,11 +22,19 @@ struct common_chat_tool_call {
|
|
21
22
|
std::string name;
|
22
23
|
std::string arguments;
|
23
24
|
std::string id;
|
25
|
+
|
26
|
+
bool operator==(const common_chat_tool_call & other) const {
|
27
|
+
return name == other.name && arguments == other.arguments && id == other.id;
|
28
|
+
}
|
24
29
|
};
|
25
30
|
|
26
31
|
struct common_chat_msg_content_part {
|
27
32
|
std::string type;
|
28
33
|
std::string text;
|
34
|
+
|
35
|
+
bool operator==(const common_chat_msg_content_part & other) const {
|
36
|
+
return type == other.type && text == other.text;
|
37
|
+
}
|
29
38
|
};
|
30
39
|
|
31
40
|
struct common_chat_msg {
|
@@ -36,6 +45,51 @@ struct common_chat_msg {
|
|
36
45
|
std::string reasoning_content;
|
37
46
|
std::string tool_name;
|
38
47
|
std::string tool_call_id;
|
48
|
+
|
49
|
+
template <class T> T to_json_oaicompat() const;
|
50
|
+
|
51
|
+
bool empty() const {
|
52
|
+
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
53
|
+
}
|
54
|
+
void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
55
|
+
for (auto i = 0u; i < tool_calls.size(); i++) {
|
56
|
+
if (ids_cache.size() <= i) {
|
57
|
+
auto id = tool_calls[i].id;
|
58
|
+
if (id.empty()) {
|
59
|
+
id = gen_tool_call_id();
|
60
|
+
}
|
61
|
+
ids_cache.push_back(id);
|
62
|
+
}
|
63
|
+
tool_calls[i].id = ids_cache[i];
|
64
|
+
}
|
65
|
+
}
|
66
|
+
bool operator==(const common_chat_msg & other) const {
|
67
|
+
return role == other.role
|
68
|
+
&& content == other.content
|
69
|
+
&& content_parts == other.content_parts
|
70
|
+
&& tool_calls == other.tool_calls
|
71
|
+
&& reasoning_content == other.reasoning_content
|
72
|
+
&& tool_name == other.tool_name
|
73
|
+
&& tool_call_id == other.tool_call_id;
|
74
|
+
}
|
75
|
+
bool operator!=(const common_chat_msg & other) const {
|
76
|
+
return !(*this == other);
|
77
|
+
}
|
78
|
+
};
|
79
|
+
|
80
|
+
struct common_chat_msg_diff {
|
81
|
+
std::string reasoning_content_delta;
|
82
|
+
std::string content_delta;
|
83
|
+
size_t tool_call_index = std::string::npos;
|
84
|
+
common_chat_tool_call tool_call_delta;
|
85
|
+
|
86
|
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
|
87
|
+
|
88
|
+
bool operator==(const common_chat_msg_diff & other) const {
|
89
|
+
return content_delta == other.content_delta
|
90
|
+
&& tool_call_index == other.tool_call_index
|
91
|
+
&& tool_call_delta == other.tool_call_delta;
|
92
|
+
}
|
39
93
|
};
|
40
94
|
|
41
95
|
struct common_chat_tool {
|
@@ -57,14 +111,11 @@ enum common_chat_format {
|
|
57
111
|
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
58
112
|
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
59
113
|
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
60
|
-
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
|
61
114
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
62
115
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
63
116
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
64
117
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
65
|
-
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
|
66
118
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
67
|
-
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
68
119
|
|
69
120
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
70
121
|
};
|
@@ -79,7 +130,8 @@ struct common_chat_templates_inputs {
|
|
79
130
|
std::vector<common_chat_tool> tools;
|
80
131
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
81
132
|
bool parallel_tool_calls = false;
|
82
|
-
|
133
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
134
|
+
bool enable_thinking = true;
|
83
135
|
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
84
136
|
};
|
85
137
|
|
@@ -88,11 +140,21 @@ struct common_chat_params {
|
|
88
140
|
std::string prompt;
|
89
141
|
std::string grammar;
|
90
142
|
bool grammar_lazy = false;
|
143
|
+
bool thinking_forced_open = false;
|
91
144
|
std::vector<common_grammar_trigger> grammar_triggers;
|
92
145
|
std::vector<std::string> preserved_tokens;
|
93
146
|
std::vector<std::string> additional_stops;
|
94
147
|
};
|
95
148
|
|
149
|
+
struct common_chat_syntax {
|
150
|
+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
151
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
152
|
+
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
153
|
+
bool reasoning_in_content = false;
|
154
|
+
bool thinking_forced_open = false;
|
155
|
+
bool parse_tool_calls = true;
|
156
|
+
};
|
157
|
+
|
96
158
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
97
159
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
98
160
|
|
@@ -129,8 +191,9 @@ std::string common_chat_format_example(
|
|
129
191
|
const struct common_chat_templates * tmpls,
|
130
192
|
bool use_jinja);
|
131
193
|
|
132
|
-
|
133
|
-
|
194
|
+
const char* common_chat_format_name(common_chat_format format);
|
195
|
+
const char* common_reasoning_format_name(common_reasoning_format format);
|
196
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
134
197
|
|
135
198
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
136
199
|
|
@@ -143,3 +206,5 @@ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common
|
|
143
206
|
// T can be std::string containing JSON or nlohmann::ordered_json
|
144
207
|
template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
|
145
208
|
template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
|
209
|
+
|
210
|
+
template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
|