cui-llama.rn 1.7.4 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
- package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/tools/mtmd/mtmd.h
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
|
4
4
|
#include "ggml.h"
|
5
5
|
#include "llama.h"
|
6
|
-
#include "clip.h"
|
7
6
|
|
8
7
|
#include <stddef.h>
|
9
8
|
#include <stdint.h>
|
@@ -109,6 +108,10 @@ MTMD_API bool mtmd_support_vision(mtmd_context * ctx);
|
|
109
108
|
// whether the current model supports audio input
|
110
109
|
MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
|
111
110
|
|
111
|
+
// get audio bitrate in Hz, for example 16000 for Whisper
|
112
|
+
// return -1 if audio is not supported
|
113
|
+
MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);
|
114
|
+
|
112
115
|
// mtmd_bitmap
|
113
116
|
//
|
114
117
|
// if bitmap is image:
|
@@ -203,79 +206,12 @@ MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
|
|
203
206
|
const mtmd_input_chunk * chunk);
|
204
207
|
|
205
208
|
// get output embeddings from the last encode pass
|
209
|
+
// the reading size (in bytes) is equal to:
|
210
|
+
// llama_model_n_embd(model) * mtmd_input_chunk_get_n_tokens(chunk) * sizeof(float)
|
206
211
|
MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
|
207
212
|
|
208
213
|
/////////////////////////////////////////
|
209
214
|
|
210
|
-
//
|
211
|
-
// Helper functions (can be implemented based on other functions)
|
212
|
-
//
|
213
|
-
// Please note that these helpers are not guaranteed to be stable.
|
214
|
-
// BREAKING CHANGES are expected.
|
215
|
-
//
|
216
|
-
|
217
|
-
// helper function to construct a mtmd_bitmap from a file
|
218
|
-
// it calls mtmd_helper_bitmap_init_from_buf() internally
|
219
|
-
// returns nullptr on failure
|
220
|
-
// this function is thread-safe
|
221
|
-
MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(const char * fname);
|
222
|
-
|
223
|
-
// helper function to construct a mtmd_bitmap from a buffer containing a file
|
224
|
-
// supported formats:
|
225
|
-
// image: formats supported by stb_image: jpg, png, bmp, gif, etc.
|
226
|
-
// audio: formats supported by miniaudio: wav, mp3, flac
|
227
|
-
// note: audio files will be auto-detected based on magic bytes
|
228
|
-
// returns nullptr on failure
|
229
|
-
// this function is thread-safe
|
230
|
-
MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len);
|
231
|
-
|
232
|
-
// helper to count the total number of tokens from a list of chunks, useful to keep track of KV cache
|
233
|
-
MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks);
|
234
|
-
|
235
|
-
// helper to count the total position of tokens from a list of chunks, useful to keep track of n_past
|
236
|
-
// normally, n_pos is equal to n_tokens, but for M-RoPE it is different
|
237
|
-
MTMD_API llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks);
|
238
|
-
|
239
|
-
// helper function that automatically:
|
240
|
-
// 1. run llama_decode() on text chunks
|
241
|
-
// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
|
242
|
-
// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
|
243
|
-
// otherwise, returns 0 on success
|
244
|
-
// this function is NOT thread-safe
|
245
|
-
MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
|
246
|
-
struct llama_context * lctx,
|
247
|
-
const mtmd_input_chunks * chunks,
|
248
|
-
llama_pos n_past,
|
249
|
-
llama_seq_id seq_id,
|
250
|
-
int32_t n_batch,
|
251
|
-
bool logits_last,
|
252
|
-
llama_pos * new_n_past);
|
253
|
-
|
254
|
-
// works like mtmd_helper_eval_chunks(), but only for a single chunk
|
255
|
-
// this function is NOT thread-safe
|
256
|
-
MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
257
|
-
struct llama_context * lctx,
|
258
|
-
const mtmd_input_chunk * chunk,
|
259
|
-
llama_pos n_past,
|
260
|
-
llama_seq_id seq_id,
|
261
|
-
int32_t n_batch,
|
262
|
-
bool logits_last,
|
263
|
-
llama_pos * new_n_past);
|
264
|
-
|
265
|
-
// helper function to decode an image whose embeddings have already been calculated
|
266
|
-
// this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
|
267
|
-
// ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
|
268
|
-
MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
|
269
|
-
struct llama_context * lctx,
|
270
|
-
const mtmd_input_chunk * chunk,
|
271
|
-
float * encoded_embd,
|
272
|
-
llama_pos n_past,
|
273
|
-
llama_seq_id seq_id,
|
274
|
-
int32_t n_batch,
|
275
|
-
llama_pos * new_n_past);
|
276
|
-
|
277
|
-
/////////////////////////////////////////
|
278
|
-
|
279
215
|
// test function, to be used in test-mtmd-c-api.c
|
280
216
|
MTMD_API mtmd_input_chunks * mtmd_test_create_input_chunks(void);
|
281
217
|
|
package/cpp/unicode.cpp
CHANGED
@@ -204,12 +204,17 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
|
|
204
204
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
205
205
|
# pragma clang diagnostic push
|
206
206
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
207
|
+
#elif defined(__GNUC__)
|
208
|
+
# pragma GCC diagnostic push
|
209
|
+
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
207
210
|
#endif
|
208
211
|
|
209
212
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
210
213
|
|
211
214
|
#if defined(__clang__)
|
212
215
|
# pragma clang diagnostic pop
|
216
|
+
#elif defined(__GNUC__)
|
217
|
+
# pragma GCC diagnostic pop
|
213
218
|
#endif
|
214
219
|
|
215
220
|
return conv.from_bytes(s);
|
package/ios/CMakeLists.txt
CHANGED
@@ -24,8 +24,19 @@ add_definitions(
|
|
24
24
|
-DLM_GGML_METAL_USE_BF16
|
25
25
|
)
|
26
26
|
|
27
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
|
28
|
+
add_definitions(-DLM_GGML_CPU_GENERIC)
|
29
|
+
endif ()
|
30
|
+
|
27
31
|
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
|
28
32
|
|
33
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
34
|
+
set(SOURCE_FILES_ARCH
|
35
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/quants.c
|
36
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/repack.cpp
|
37
|
+
)
|
38
|
+
endif ()
|
39
|
+
|
29
40
|
# Define public headers
|
30
41
|
set(PUBLIC_HEADERS
|
31
42
|
${SOURCE_DIR}/rn-llama.h
|
@@ -44,12 +55,11 @@ add_library(rnllama SHARED
|
|
44
55
|
${SOURCE_DIR}/ggml-cpu/amx/mmq.cpp
|
45
56
|
${SOURCE_DIR}/ggml-cpu/ggml-cpu.c
|
46
57
|
${SOURCE_DIR}/ggml-cpu/ggml-cpu.cpp
|
47
|
-
${SOURCE_DIR}/ggml-cpu/
|
48
|
-
${SOURCE_DIR}/ggml-cpu/
|
49
|
-
${SOURCE_DIR}/ggml-cpu/
|
58
|
+
${SOURCE_DIR}/ggml-cpu/quants.c
|
59
|
+
${SOURCE_DIR}/ggml-cpu/traits.cpp
|
60
|
+
${SOURCE_DIR}/ggml-cpu/repack.cpp
|
50
61
|
${SOURCE_DIR}/ggml-cpu/unary-ops.cpp
|
51
62
|
${SOURCE_DIR}/ggml-cpu/binary-ops.cpp
|
52
|
-
${SOURCE_DIR}/ggml-cpu/sgemm.cpp
|
53
63
|
${SOURCE_DIR}/ggml-cpu/vec.cpp
|
54
64
|
${SOURCE_DIR}/ggml-cpu/ops.cpp
|
55
65
|
${SOURCE_DIR}/ggml-metal.m
|
@@ -65,7 +75,6 @@ add_library(rnllama SHARED
|
|
65
75
|
${SOURCE_DIR}/llama-adapter.cpp
|
66
76
|
${SOURCE_DIR}/llama-chat.cpp
|
67
77
|
${SOURCE_DIR}/llama-context.cpp
|
68
|
-
${SOURCE_DIR}/llama-kv-cache.cpp
|
69
78
|
${SOURCE_DIR}/llama-arch.cpp
|
70
79
|
${SOURCE_DIR}/llama-batch.cpp
|
71
80
|
${SOURCE_DIR}/llama-cparams.cpp
|
@@ -75,6 +84,10 @@ add_library(rnllama SHARED
|
|
75
84
|
${SOURCE_DIR}/llama-model-loader.cpp
|
76
85
|
${SOURCE_DIR}/llama-model-saver.cpp
|
77
86
|
${SOURCE_DIR}/llama-mmap.cpp
|
87
|
+
${SOURCE_DIR}/llama-kv-cache-unified.cpp
|
88
|
+
${SOURCE_DIR}/llama-kv-cache-unified-iswa.cpp
|
89
|
+
${SOURCE_DIR}/llama-memory-hybrid.cpp
|
90
|
+
${SOURCE_DIR}/llama-memory-recurrent.cpp
|
78
91
|
${SOURCE_DIR}/llama-vocab.cpp
|
79
92
|
${SOURCE_DIR}/llama-memory.cpp
|
80
93
|
${SOURCE_DIR}/llama-io.cpp
|
@@ -87,13 +100,18 @@ add_library(rnllama SHARED
|
|
87
100
|
${SOURCE_DIR}/json-schema-to-grammar.cpp
|
88
101
|
${SOURCE_DIR}/minja/minja.hpp
|
89
102
|
${SOURCE_DIR}/minja/chat-template.hpp
|
90
|
-
${SOURCE_DIR}/json.hpp
|
103
|
+
${SOURCE_DIR}/nlohmann/json.hpp
|
104
|
+
${SOURCE_DIR}/nlohmann/json_fwd.hpp
|
105
|
+
${SOURCE_DIR}/chat-parser.cpp
|
106
|
+
${SOURCE_DIR}/json-partial.cpp
|
107
|
+
${SOURCE_DIR}/regex-partial.cpp
|
91
108
|
# Multimodal support
|
92
109
|
${SOURCE_DIR}/tools/mtmd/mtmd.cpp
|
93
110
|
${SOURCE_DIR}/tools/mtmd/mtmd-audio.cpp
|
94
111
|
${SOURCE_DIR}/tools/mtmd/clip.cpp
|
95
112
|
${SOURCE_DIR}/tools/mtmd/mtmd-helper.cpp
|
96
113
|
${SOURCE_DIR}/rn-llama.cpp
|
114
|
+
${SOURCE_FILES_ARCH}
|
97
115
|
)
|
98
116
|
|
99
117
|
# Setup include directories
|
@@ -102,6 +120,8 @@ target_include_directories(rnllama
|
|
102
120
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
|
103
121
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
|
104
122
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/tools/mtmd>
|
123
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/minja>
|
124
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/nlohmann>
|
105
125
|
$<INSTALL_INTERFACE:include>
|
106
126
|
)
|
107
127
|
|
package/ios/RNLlama.h
CHANGED
package/ios/RNLlama.mm
CHANGED
@@ -102,13 +102,21 @@ RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
|
|
102
102
|
if ([params[@"jinja"] boolValue]) {
|
103
103
|
NSString *jsonSchema = params[@"json_schema"];
|
104
104
|
NSString *tools = params[@"tools"];
|
105
|
-
|
105
|
+
BOOL parallelToolCalls = [params[@"parallel_tool_calls"] boolValue];
|
106
106
|
NSString *toolChoice = params[@"tool_choice"];
|
107
|
-
|
107
|
+
BOOL enableThinking = [params[@"enable_thinking"] boolValue];
|
108
|
+
resolve([context getFormattedChatWithJinja:messages
|
109
|
+
withChatTemplate:chatTemplate
|
110
|
+
withJsonSchema:jsonSchema
|
111
|
+
withTools:tools
|
112
|
+
withParallelToolCalls:parallelToolCalls
|
113
|
+
withToolChoice:toolChoice
|
114
|
+
withEnableThinking:enableThinking
|
115
|
+
]);
|
108
116
|
} else {
|
109
117
|
resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
|
110
118
|
}
|
111
|
-
} catch (const nlohmann::
|
119
|
+
} catch (const nlohmann::json_abi_v3_12_0::detail::parse_error& e) {
|
112
120
|
NSString *errorMessage = [NSString stringWithUTF8String:e.what()];
|
113
121
|
reject(@"llama_error", [NSString stringWithFormat:@"JSON parse error in getFormattedChat: %@", errorMessage], nil);
|
114
122
|
} catch (const std::exception& e) { // catch cpp exceptions
|
@@ -297,6 +305,25 @@ RCT_EXPORT_METHOD(embedding:(double)contextId
|
|
297
305
|
}
|
298
306
|
}
|
299
307
|
|
308
|
+
RCT_EXPORT_METHOD(rerank:(double)contextId
|
309
|
+
query:(NSString *)query
|
310
|
+
documents:(NSArray<NSString *> *)documents
|
311
|
+
params:(NSDictionary *)params
|
312
|
+
resolver:(RCTPromiseResolveBlock)resolve
|
313
|
+
rejecter:(RCTPromiseRejectBlock)reject) {
|
314
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
315
|
+
if (context == nil) {
|
316
|
+
reject(@"context_not_found", @"Context not found", nil);
|
317
|
+
return;
|
318
|
+
}
|
319
|
+
@try {
|
320
|
+
NSArray *result = [context rerank:query documents:documents params:params];
|
321
|
+
resolve(result);
|
322
|
+
} @catch (NSException *exception) {
|
323
|
+
reject(@"rerank_error", exception.reason, nil);
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
300
327
|
RCT_EXPORT_METHOD(bench:(double)contextId
|
301
328
|
pp:(int)pp
|
302
329
|
tg:(int)tg
|
@@ -434,6 +461,129 @@ RCT_EXPORT_METHOD(releaseMultimodal:(double)contextId
|
|
434
461
|
resolve(nil);
|
435
462
|
}
|
436
463
|
|
464
|
+
RCT_EXPORT_METHOD(initVocoder:(double)contextId
|
465
|
+
withVocoderModelPath:(NSString *)vocoderModelPath
|
466
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
467
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
468
|
+
{
|
469
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
470
|
+
if (context == nil) {
|
471
|
+
reject(@"llama_error", @"Context not found", nil);
|
472
|
+
return;
|
473
|
+
}
|
474
|
+
if ([context isPredicting]) {
|
475
|
+
reject(@"llama_error", @"Context is busy", nil);
|
476
|
+
return;
|
477
|
+
}
|
478
|
+
|
479
|
+
@try {
|
480
|
+
bool success = [context initVocoder:vocoderModelPath];
|
481
|
+
resolve(@(success));
|
482
|
+
} @catch (NSException *exception) {
|
483
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
RCT_EXPORT_METHOD(isVocoderEnabled:(double)contextId
|
488
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
489
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
490
|
+
{
|
491
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
492
|
+
if (context == nil) {
|
493
|
+
reject(@"llama_error", @"Context not found", nil);
|
494
|
+
return;
|
495
|
+
}
|
496
|
+
|
497
|
+
resolve(@([context isVocoderEnabled]));
|
498
|
+
}
|
499
|
+
|
500
|
+
RCT_EXPORT_METHOD(getFormattedAudioCompletion:(double)contextId
|
501
|
+
withSpeakerJsonStr:(NSString *)speakerJsonStr
|
502
|
+
withTextToSpeak:(NSString *)textToSpeak
|
503
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
504
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
505
|
+
{
|
506
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
507
|
+
if (context == nil) {
|
508
|
+
reject(@"llama_error", @"Context not found", nil);
|
509
|
+
return;
|
510
|
+
}
|
511
|
+
|
512
|
+
if (![context isVocoderEnabled]) {
|
513
|
+
reject(@"llama_error", @"Vocoder is not enabled", nil);
|
514
|
+
return;
|
515
|
+
}
|
516
|
+
|
517
|
+
@try {
|
518
|
+
NSString *result = [context getFormattedAudioCompletion:speakerJsonStr textToSpeak:textToSpeak];
|
519
|
+
resolve(result);
|
520
|
+
} @catch (NSException *exception) {
|
521
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
RCT_EXPORT_METHOD(getAudioCompletionGuideTokens:(double)contextId
|
526
|
+
withTextToSpeak:(NSString *)textToSpeak
|
527
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
528
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
529
|
+
{
|
530
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
531
|
+
if (context == nil) {
|
532
|
+
reject(@"llama_error", @"Context not found", nil);
|
533
|
+
return;
|
534
|
+
}
|
535
|
+
|
536
|
+
if (![context isVocoderEnabled]) {
|
537
|
+
reject(@"llama_error", @"Vocoder is not enabled", nil);
|
538
|
+
return;
|
539
|
+
}
|
540
|
+
|
541
|
+
@try {
|
542
|
+
NSArray *guideTokens = [context getAudioCompletionGuideTokens:textToSpeak];
|
543
|
+
resolve(guideTokens);
|
544
|
+
} @catch (NSException *exception) {
|
545
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
546
|
+
}
|
547
|
+
}
|
548
|
+
|
549
|
+
RCT_EXPORT_METHOD(decodeAudioTokens:(double)contextId
|
550
|
+
withTokens:(NSArray *)tokens
|
551
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
552
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
553
|
+
{
|
554
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
555
|
+
if (context == nil) {
|
556
|
+
reject(@"llama_error", @"Context not found", nil);
|
557
|
+
return;
|
558
|
+
}
|
559
|
+
|
560
|
+
if (![context isVocoderEnabled]) {
|
561
|
+
reject(@"llama_error", @"Vocoder is not enabled", nil);
|
562
|
+
return;
|
563
|
+
}
|
564
|
+
|
565
|
+
@try {
|
566
|
+
NSArray *audioData = [context decodeAudioTokens:tokens];
|
567
|
+
resolve(audioData);
|
568
|
+
} @catch (NSException *exception) {
|
569
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
570
|
+
}
|
571
|
+
}
|
572
|
+
|
573
|
+
RCT_EXPORT_METHOD(releaseVocoder:(double)contextId
|
574
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
575
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
576
|
+
{
|
577
|
+
RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
578
|
+
if (context == nil) {
|
579
|
+
reject(@"llama_error", @"Context not found", nil);
|
580
|
+
return;
|
581
|
+
}
|
582
|
+
|
583
|
+
[context releaseVocoder];
|
584
|
+
resolve(nil);
|
585
|
+
}
|
586
|
+
|
437
587
|
RCT_EXPORT_METHOD(releaseContext:(double)contextId
|
438
588
|
withResolver:(RCTPromiseResolveBlock)resolve
|
439
589
|
withRejecter:(RCTPromiseRejectBlock)reject)
|
package/ios/RNLlamaContext.h
CHANGED
@@ -43,12 +43,14 @@
|
|
43
43
|
- (NSDictionary *)tokenize:(NSString *)text imagePaths:(NSArray *)imagePaths;
|
44
44
|
- (NSString *)detokenize:(NSArray *)tokens;
|
45
45
|
- (NSDictionary *)embedding:(NSString *)text params:(NSDictionary *)params;
|
46
|
+
- (NSArray *)rerank:(NSString *)query documents:(NSArray<NSString *> *)documents params:(NSDictionary *)params;
|
46
47
|
- (NSDictionary *)getFormattedChatWithJinja:(NSString *)messages
|
47
48
|
withChatTemplate:(NSString *)chatTemplate
|
48
49
|
withJsonSchema:(NSString *)jsonSchema
|
49
50
|
withTools:(NSString *)tools
|
50
51
|
withParallelToolCalls:(BOOL)parallelToolCalls
|
51
|
-
withToolChoice:(NSString *)toolChoice
|
52
|
+
withToolChoice:(NSString *)toolChoice
|
53
|
+
withEnableThinking:(BOOL)enableThinking;
|
52
54
|
- (NSString *)getFormattedChat:(NSString *)messages withChatTemplate:(NSString *)chatTemplate;
|
53
55
|
- (NSDictionary *)loadSession:(NSString *)path;
|
54
56
|
- (int)saveSession:(NSString *)path size:(int)size;
|
@@ -56,6 +58,12 @@
|
|
56
58
|
- (void)applyLoraAdapters:(NSArray *)loraAdapters;
|
57
59
|
- (void)removeLoraAdapters;
|
58
60
|
- (NSArray *)getLoadedLoraAdapters;
|
61
|
+
- (bool)initVocoder:(NSString *)vocoderModelPath;
|
62
|
+
- (bool)isVocoderEnabled;
|
63
|
+
- (NSString *)getFormattedAudioCompletion:(NSString *)speakerJsonStr textToSpeak:(NSString *)textToSpeak;
|
64
|
+
- (NSArray *)getAudioCompletionGuideTokens:(NSString *)textToSpeak;
|
65
|
+
- (NSArray *)decodeAudioTokens:(NSArray *)tokens;
|
66
|
+
- (void)releaseVocoder;
|
59
67
|
- (void)invalidate;
|
60
68
|
|
61
69
|
@end
|
package/ios/RNLlamaContext.mm
CHANGED
@@ -90,13 +90,6 @@
|
|
90
90
|
NSLog(@"chatTemplate: %@", chatTemplate);
|
91
91
|
}
|
92
92
|
|
93
|
-
NSString *reasoningFormat = params[@"reasoning_format"];
|
94
|
-
if (reasoningFormat && [reasoningFormat isEqualToString:@"deepseek"]) {
|
95
|
-
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
96
|
-
} else {
|
97
|
-
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
98
|
-
}
|
99
|
-
|
100
93
|
if (params[@"n_ctx"]) defaultParams.n_ctx = [params[@"n_ctx"] intValue];
|
101
94
|
if (params[@"use_mlock"]) defaultParams.use_mlock = [params[@"use_mlock"]boolValue];
|
102
95
|
|
@@ -362,6 +355,7 @@
|
|
362
355
|
withTools:(NSString *)tools
|
363
356
|
withParallelToolCalls:(BOOL)parallelToolCalls
|
364
357
|
withToolChoice:(NSString *)toolChoice
|
358
|
+
withEnableThinking:(BOOL)enableThinking
|
365
359
|
{
|
366
360
|
auto tmpl_str = chatTemplate == nil ? "" : [chatTemplate UTF8String];
|
367
361
|
|
@@ -372,7 +366,8 @@
|
|
372
366
|
jsonSchema == nil ? "" : [jsonSchema UTF8String],
|
373
367
|
tools == nil ? "" : [tools UTF8String],
|
374
368
|
parallelToolCalls,
|
375
|
-
toolChoice == nil ? "" : [toolChoice UTF8String]
|
369
|
+
toolChoice == nil ? "" : [toolChoice UTF8String],
|
370
|
+
enableThinking
|
376
371
|
);
|
377
372
|
result[@"prompt"] = [NSString stringWithUTF8String:chatParams.prompt.c_str()];
|
378
373
|
result[@"chat_format"] = @(static_cast<int>(chatParams.format));
|
@@ -386,6 +381,7 @@
|
|
386
381
|
@"token": @(trigger.token),
|
387
382
|
}];
|
388
383
|
}
|
384
|
+
result[@"thinking_forced_open"] = @(chatParams.thinking_forced_open);
|
389
385
|
result[@"grammar_triggers"] = grammar_triggers;
|
390
386
|
NSMutableArray *preserved_tokens = [[NSMutableArray alloc] init];
|
391
387
|
for (const auto & token : chatParams.preserved_tokens) {
|
@@ -581,6 +577,16 @@
|
|
581
577
|
}
|
582
578
|
}
|
583
579
|
|
580
|
+
if (params[@"guide_tokens"] && [params[@"guide_tokens"] isKindOfClass:[NSArray class]]) {
|
581
|
+
NSArray *guide_tokens_array = params[@"guide_tokens"];
|
582
|
+
std::vector<llama_token> guide_tokens;
|
583
|
+
guide_tokens.reserve([guide_tokens_array count]);
|
584
|
+
for (NSNumber *token_num in guide_tokens_array) {
|
585
|
+
guide_tokens.push_back([token_num intValue]);
|
586
|
+
}
|
587
|
+
llama->setGuideTokens(guide_tokens);
|
588
|
+
}
|
589
|
+
|
584
590
|
if (!llama->initSampling()) {
|
585
591
|
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to initialize sampling" userInfo:nil];
|
586
592
|
}
|
@@ -604,6 +610,9 @@
|
|
604
610
|
} catch (const std::exception &e) {
|
605
611
|
llama->endCompletion();
|
606
612
|
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
613
|
+
} catch (const std::runtime_error& e) {
|
614
|
+
llama->endCompletion();
|
615
|
+
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
607
616
|
}
|
608
617
|
|
609
618
|
if (llama->context_full) {
|
@@ -680,7 +689,20 @@
|
|
680
689
|
if (!llama->is_interrupted) {
|
681
690
|
try {
|
682
691
|
auto chat_format = params[@"chat_format"] ? [params[@"chat_format"] intValue] : COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
683
|
-
|
692
|
+
common_chat_syntax chat_syntax;
|
693
|
+
chat_syntax.format = static_cast<common_chat_format>(chat_format);
|
694
|
+
|
695
|
+
NSString *reasoningFormat = params[@"reasoning_format"];
|
696
|
+
if (reasoningFormat && [reasoningFormat isEqualToString:@"deepseek"]) {
|
697
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
698
|
+
} else if (reasoningFormat && [reasoningFormat isEqualToString:@"deepseek-legacy"]) {
|
699
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
|
700
|
+
} else {
|
701
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
702
|
+
}
|
703
|
+
chat_syntax.thinking_forced_open = [params[@"thinking_forced_open"] boolValue];
|
704
|
+
|
705
|
+
common_chat_msg message = common_chat_parse(llama->generated_text, false, chat_syntax);
|
684
706
|
if (!message.reasoning_content.empty()) {
|
685
707
|
reasoningContent = [NSString stringWithUTF8String:message.reasoning_content.c_str()];
|
686
708
|
}
|
@@ -716,6 +738,15 @@
|
|
716
738
|
result[@"stopped_limit"] = @(llama->stopped_limit);
|
717
739
|
result[@"stopping_word"] = [NSString stringWithUTF8String:llama->stopping_word.c_str()];
|
718
740
|
result[@"tokens_cached"] = @(llama->n_past);
|
741
|
+
|
742
|
+
if (llama->isVocoderEnabled() && !llama->audio_tokens.empty()) {
|
743
|
+
NSMutableArray *audioTokens = [[NSMutableArray alloc] init];
|
744
|
+
for (llama_token token : llama->audio_tokens) {
|
745
|
+
[audioTokens addObject:@(token)];
|
746
|
+
}
|
747
|
+
result[@"audio_tokens"] = audioTokens;
|
748
|
+
}
|
749
|
+
|
719
750
|
result[@"timings"] = @{
|
720
751
|
@"prompt_n": @(timings.n_p_eval),
|
721
752
|
@"prompt_ms": @(timings.t_p_eval_ms),
|
@@ -775,6 +806,8 @@
|
|
775
806
|
return result;
|
776
807
|
} catch (const std::exception &e) {
|
777
808
|
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
809
|
+
} catch (const std::runtime_error& e) {
|
810
|
+
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
778
811
|
}
|
779
812
|
}
|
780
813
|
|
@@ -817,6 +850,9 @@
|
|
817
850
|
} catch (const std::exception &e) {
|
818
851
|
llama->endCompletion();
|
819
852
|
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
853
|
+
} catch (const std::runtime_error& e) {
|
854
|
+
llama->endCompletion();
|
855
|
+
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
820
856
|
}
|
821
857
|
llama->doCompletion();
|
822
858
|
|
@@ -838,6 +874,34 @@
|
|
838
874
|
return resultDict;
|
839
875
|
}
|
840
876
|
|
877
|
+
- (NSArray *)rerank:(NSString *)query documents:(NSArray<NSString *> *)documents params:(NSDictionary *)params {
|
878
|
+
// Convert NSArray to std::vector
|
879
|
+
std::vector<std::string> documentsVector;
|
880
|
+
for (NSString *doc in documents) {
|
881
|
+
documentsVector.push_back(std::string([doc UTF8String]));
|
882
|
+
}
|
883
|
+
|
884
|
+
NSMutableArray *resultArray = [[NSMutableArray alloc] init];
|
885
|
+
|
886
|
+
try {
|
887
|
+
std::vector<float> scores = llama->rerank(std::string([query UTF8String]), documentsVector);
|
888
|
+
|
889
|
+
// Create result array with score and index
|
890
|
+
for (size_t i = 0; i < scores.size(); i++) {
|
891
|
+
NSMutableDictionary *item = [[NSMutableDictionary alloc] init];
|
892
|
+
item[@"score"] = @(scores[i]);
|
893
|
+
item[@"index"] = @((int)i);
|
894
|
+
[resultArray addObject:item];
|
895
|
+
}
|
896
|
+
} catch (const std::exception &e) {
|
897
|
+
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
898
|
+
} catch (const std::runtime_error& e) {
|
899
|
+
@throw [NSException exceptionWithName:@"LlamaException" reason:[NSString stringWithUTF8String:e.what()] userInfo:nil];
|
900
|
+
}
|
901
|
+
|
902
|
+
return resultArray;
|
903
|
+
}
|
904
|
+
|
841
905
|
- (NSDictionary *)loadSession:(NSString *)path {
|
842
906
|
if (!path || [path length] == 0) {
|
843
907
|
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Session path is empty" userInfo:nil];
|
@@ -920,6 +984,45 @@
|
|
920
984
|
return result;
|
921
985
|
}
|
922
986
|
|
987
|
+
- (bool)initVocoder:(NSString *)vocoderModelPath {
|
988
|
+
return llama->initVocoder([vocoderModelPath UTF8String]);
|
989
|
+
}
|
990
|
+
|
991
|
+
- (bool)isVocoderEnabled {
|
992
|
+
return llama->isVocoderEnabled();
|
993
|
+
}
|
994
|
+
|
995
|
+
- (NSString *)getFormattedAudioCompletion:(NSString *)speakerJsonStr textToSpeak:(NSString *)textToSpeak {
|
996
|
+
std::string speakerStr = speakerJsonStr ? [speakerJsonStr UTF8String] : "";
|
997
|
+
return [NSString stringWithUTF8String:llama->getFormattedAudioCompletion(speakerStr, [textToSpeak UTF8String]).c_str()];
|
998
|
+
}
|
999
|
+
|
1000
|
+
- (NSArray *)getAudioCompletionGuideTokens:(NSString *)textToSpeak {
|
1001
|
+
std::vector<llama_token> guide_tokens = llama->getAudioCompletionGuideTokens([textToSpeak UTF8String]);
|
1002
|
+
NSMutableArray *result = [[NSMutableArray alloc] init];
|
1003
|
+
for (llama_token token : guide_tokens) {
|
1004
|
+
[result addObject:@(token)];
|
1005
|
+
}
|
1006
|
+
return result;
|
1007
|
+
}
|
1008
|
+
|
1009
|
+
- (NSArray *)decodeAudioTokens:(NSArray *)tokens {
|
1010
|
+
std::vector<llama_token> token_vector;
|
1011
|
+
for (NSNumber *token in tokens) {
|
1012
|
+
token_vector.push_back([token intValue]);
|
1013
|
+
}
|
1014
|
+
std::vector<float> audio_data = llama->decodeAudioTokens(token_vector);
|
1015
|
+
NSMutableArray *result = [[NSMutableArray alloc] init];
|
1016
|
+
for (float sample : audio_data) {
|
1017
|
+
[result addObject:@(sample)];
|
1018
|
+
}
|
1019
|
+
return result;
|
1020
|
+
}
|
1021
|
+
|
1022
|
+
- (void)releaseVocoder {
|
1023
|
+
llama->releaseVocoder();
|
1024
|
+
}
|
1025
|
+
|
923
1026
|
- (void)invalidate {
|
924
1027
|
delete llama;
|
925
1028
|
// llama_backend_free();
|