cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/README.md
CHANGED
@@ -55,6 +55,8 @@ For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://
|
|
55
55
|
|
56
56
|
## Usage
|
57
57
|
|
58
|
+
> **💡 New!** `llama.rn` now supports **multimodal models** with vision and audio capabilities! See the [Multimodal section](#multimodal-vision--audio) for details.
|
59
|
+
|
58
60
|
Load model info only:
|
59
61
|
|
60
62
|
```js
|
@@ -123,49 +125,162 @@ console.log('Result:', textResult.text)
|
|
123
125
|
console.log('Timings:', textResult.timings)
|
124
126
|
```
|
125
127
|
|
126
|
-
The binding
|
128
|
+
The binding's deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
|
127
129
|
|
128
130
|
- `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
|
129
131
|
- `/tokenize`: `context.tokenize(content)`
|
130
132
|
- `/detokenize`: `context.detokenize(tokens)`
|
131
133
|
- `/embedding`: `context.embedding(content)`
|
134
|
+
- `/rerank`: `context.rerank(query, documents, params)`
|
132
135
|
- ... Other methods
|
133
136
|
|
134
137
|
Please visit the [Documentation](docs/API) for more details.
|
135
138
|
|
136
139
|
You can also visit the [example](example) to see how to use it.
|
137
140
|
|
138
|
-
##
|
141
|
+
## Multimodal (Vision & Audio)
|
139
142
|
|
140
|
-
|
143
|
+
`llama.rn` supports multimodal capabilities including vision (images) and audio processing. This allows you to interact with models that can understand both text and media content.
|
144
|
+
|
145
|
+
### Supported Media Formats
|
146
|
+
|
147
|
+
**Images (Vision):**
|
148
|
+
- JPEG, PNG, BMP, GIF, TGA, HDR, PIC, PNM
|
149
|
+
- Base64 encoded images (data URLs)
|
150
|
+
- Local file paths
|
151
|
+
- \* Not supported HTTP URLs yet
|
152
|
+
|
153
|
+
**Audio:**
|
154
|
+
- WAV, MP3 formats
|
155
|
+
- Base64 encoded audio (data URLs)
|
156
|
+
- Local file paths
|
157
|
+
- \* Not supported HTTP URLs yet
|
158
|
+
|
159
|
+
### Setup
|
160
|
+
|
161
|
+
First, you need a multimodal model and its corresponding multimodal projector (mmproj) file, see [how to obtain mmproj](https://github.com/ggml-org/llama.cpp/tree/master/tools/mtmd#how-to-obtain-mmproj) for more details.
|
162
|
+
|
163
|
+
### Initialize Multimodal Support
|
141
164
|
|
142
165
|
```js
|
143
|
-
|
166
|
+
import { initLlama } from 'llama.rn'
|
144
167
|
|
145
|
-
//
|
168
|
+
// First initialize the model context
|
169
|
+
const context = await initLlama({
|
170
|
+
model: 'path/to/your/multimodal-model.gguf',
|
171
|
+
n_ctx: 4096,
|
172
|
+
n_gpu_layers: 99, // Recommended for multimodal models
|
173
|
+
// Important: Disable context shifting for multimodal
|
174
|
+
ctx_shift: false,
|
175
|
+
})
|
146
176
|
|
147
|
-
//
|
148
|
-
await context.
|
177
|
+
// Initialize multimodal support with mmproj file
|
178
|
+
const success = await context.initMultimodal({
|
179
|
+
path: 'path/to/your/mmproj-model.gguf',
|
180
|
+
use_gpu: true, // Recommended for better performance
|
181
|
+
})
|
149
182
|
|
150
|
-
//
|
151
|
-
|
183
|
+
// Check if multimodal is enabled
|
184
|
+
console.log('Multimodal enabled:', await context.isMultimodalEnabled())
|
185
|
+
|
186
|
+
if (success) {
|
187
|
+
console.log('Multimodal support initialized!')
|
188
|
+
|
189
|
+
// Check what modalities are supported
|
190
|
+
const support = await context.getMultimodalSupport()
|
191
|
+
console.log('Vision support:', support.vision)
|
192
|
+
console.log('Audio support:', support.audio)
|
193
|
+
} else {
|
194
|
+
console.log('Failed to initialize multimodal support')
|
195
|
+
}
|
196
|
+
|
197
|
+
// Release multimodal context
|
198
|
+
await context.releaseMultimodal()
|
152
199
|
```
|
153
200
|
|
154
|
-
|
201
|
+
### Usage Examples
|
155
202
|
|
156
|
-
|
203
|
+
#### Vision (Image Processing)
|
157
204
|
|
158
205
|
```js
|
159
|
-
const
|
160
|
-
|
161
|
-
|
206
|
+
const result = await context.completion({
|
207
|
+
messages: [
|
208
|
+
{
|
209
|
+
role: 'user',
|
210
|
+
content: [
|
211
|
+
{
|
212
|
+
type: 'text',
|
213
|
+
text: 'What do you see in this image?',
|
214
|
+
},
|
215
|
+
{
|
216
|
+
type: 'image_url',
|
217
|
+
image_url: {
|
218
|
+
url: 'file:///path/to/image.jpg',
|
219
|
+
// or base64: '...'
|
220
|
+
},
|
221
|
+
},
|
222
|
+
],
|
223
|
+
},
|
224
|
+
],
|
225
|
+
n_predict: 100,
|
226
|
+
temperature: 0.1,
|
162
227
|
})
|
163
228
|
|
164
|
-
|
229
|
+
console.log('AI Response:', result.text)
|
165
230
|
```
|
166
231
|
|
167
|
-
|
168
|
-
|
232
|
+
#### Audio Processing
|
233
|
+
|
234
|
+
```js
|
235
|
+
// Method 1: Using structured message content (Recommended)
|
236
|
+
const result = await context.completion({
|
237
|
+
messages: [
|
238
|
+
{
|
239
|
+
role: 'user',
|
240
|
+
content: [
|
241
|
+
{
|
242
|
+
type: 'text',
|
243
|
+
text: 'Transcribe or describe this audio:',
|
244
|
+
},
|
245
|
+
{
|
246
|
+
type: 'input_audio',
|
247
|
+
input_audio: {
|
248
|
+
data: 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10...',
|
249
|
+
// or url: 'file:///path/to/audio.wav',
|
250
|
+
format: 'wav', // or 'mp3'
|
251
|
+
},
|
252
|
+
},
|
253
|
+
],
|
254
|
+
},
|
255
|
+
],
|
256
|
+
n_predict: 200,
|
257
|
+
})
|
258
|
+
|
259
|
+
console.log('Transcription:', result.text)
|
260
|
+
```
|
261
|
+
|
262
|
+
### Tokenization with Media
|
263
|
+
|
264
|
+
```js
|
265
|
+
// Tokenize text with media
|
266
|
+
const tokenizeResult = await context.tokenize(
|
267
|
+
'Describe this image: <__media__>',
|
268
|
+
{
|
269
|
+
media_paths: ['file:///path/to/image.jpg']
|
270
|
+
}
|
271
|
+
)
|
272
|
+
|
273
|
+
console.log('Tokens:', tokenizeResult.tokens)
|
274
|
+
console.log('Has media:', tokenizeResult.has_media)
|
275
|
+
console.log('Media positions:', tokenizeResult.chunk_pos_media)
|
276
|
+
```
|
277
|
+
|
278
|
+
### Notes
|
279
|
+
|
280
|
+
- **Context Shifting**: Multimodal models require `ctx_shift: false` to maintain media token positioning
|
281
|
+
- **Memory**: Multimodal models require more memory; use adequate `n_ctx` and consider GPU offloading
|
282
|
+
- **Media Markers**: The system automatically handles `<__media__>` markers in prompts. When using structured message content, media items are automatically replaced with this marker
|
283
|
+
- **Model Compatibility**: Ensure your model supports the media type you're trying to process
|
169
284
|
|
170
285
|
## Tool Calling
|
171
286
|
|
@@ -289,6 +404,91 @@ console.log('Result:', text)
|
|
289
404
|
|
290
405
|
Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
|
291
406
|
|
407
|
+
## Session (State)
|
408
|
+
|
409
|
+
The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
|
410
|
+
|
411
|
+
```js
|
412
|
+
const context = await initLlama({ ...params })
|
413
|
+
|
414
|
+
// After prompt processing or completion ...
|
415
|
+
|
416
|
+
// Save the session
|
417
|
+
await context.saveSession('<path to save session>')
|
418
|
+
|
419
|
+
// Load the session
|
420
|
+
await context.loadSession('<path to load session>')
|
421
|
+
```
|
422
|
+
|
423
|
+
### Notes
|
424
|
+
|
425
|
+
- \* Session is currently not supported save state from multimodal context, so it only stores the text chunk before the first media chunk.
|
426
|
+
|
427
|
+
## Embedding
|
428
|
+
|
429
|
+
The embedding API is used to get the embedding of a text.
|
430
|
+
|
431
|
+
```js
|
432
|
+
const context = await initLlama({
|
433
|
+
...params,
|
434
|
+
embedding: true,
|
435
|
+
})
|
436
|
+
|
437
|
+
const { embedding } = await context.embedding('Hello, world!')
|
438
|
+
```
|
439
|
+
|
440
|
+
- You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
|
441
|
+
- You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
|
442
|
+
|
443
|
+
## Rerank
|
444
|
+
|
445
|
+
The rerank API is used to rank documents based on their relevance to a query. This is particularly useful for improving search results and implementing retrieval-augmented generation (RAG) systems.
|
446
|
+
|
447
|
+
```js
|
448
|
+
const context = await initLlama({
|
449
|
+
...params,
|
450
|
+
embedding: true, // Required for reranking
|
451
|
+
pooling_type: 'rank', // Use rank pooling for rerank models
|
452
|
+
})
|
453
|
+
|
454
|
+
// Rerank documents based on relevance to query
|
455
|
+
const results = await context.rerank(
|
456
|
+
'What is artificial intelligence?', // query
|
457
|
+
[
|
458
|
+
'AI is a branch of computer science.',
|
459
|
+
'The weather is nice today.',
|
460
|
+
'Machine learning is a subset of AI.',
|
461
|
+
'I like pizza.',
|
462
|
+
], // documents to rank
|
463
|
+
{
|
464
|
+
normalize: 1, // Optional: normalize scores (default: from model config)
|
465
|
+
}
|
466
|
+
)
|
467
|
+
|
468
|
+
// Results are automatically sorted by score (highest first)
|
469
|
+
results.forEach((result, index) => {
|
470
|
+
console.log(`Rank ${index + 1}:`, {
|
471
|
+
score: result.score,
|
472
|
+
document: result.document,
|
473
|
+
originalIndex: result.index,
|
474
|
+
})
|
475
|
+
})
|
476
|
+
```
|
477
|
+
|
478
|
+
### Notes
|
479
|
+
|
480
|
+
- **Model Requirements**: Reranking requires models with `RANK` pooling type (e.g., reranker models)
|
481
|
+
- **Embedding Enabled**: The context must have `embedding: true` to use rerank functionality
|
482
|
+
- **Automatic Sorting**: Results are returned sorted by relevance score in descending order
|
483
|
+
- **Document Access**: Each result includes the original document text and its index in the input array
|
484
|
+
- **Score Interpretation**: Higher scores indicate higher relevance to the query
|
485
|
+
|
486
|
+
### Recommended Models
|
487
|
+
|
488
|
+
- [jinaai - jina-reranker-v2-base-multilingual-GGUF](https://huggingface.co/gpustack/jina-reranker-v2-base-multilingual-GGUF)
|
489
|
+
- [BAAI - bge-reranker-v2-m3-GGUF](https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF)
|
490
|
+
- Other models with "rerank" or "reranker" in their name and GGUF format
|
491
|
+
|
292
492
|
## Mock `llama.rn`
|
293
493
|
|
294
494
|
We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
|
@@ -27,12 +27,11 @@ set(
|
|
27
27
|
${RNLLAMA_LIB_DIR}/ggml-cpu/amx/mmq.cpp
|
28
28
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.c
|
29
29
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
|
30
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
31
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
32
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
30
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/quants.c
|
31
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/traits.cpp
|
32
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/repack.cpp
|
33
33
|
${RNLLAMA_LIB_DIR}/ggml-cpu/unary-ops.cpp
|
34
34
|
${RNLLAMA_LIB_DIR}/ggml-cpu/binary-ops.cpp
|
35
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/sgemm.cpp
|
36
35
|
${RNLLAMA_LIB_DIR}/ggml-cpu/vec.cpp
|
37
36
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ops.cpp
|
38
37
|
${RNLLAMA_LIB_DIR}/ggml-opt.cpp
|
@@ -41,6 +40,9 @@ set(
|
|
41
40
|
${RNLLAMA_LIB_DIR}/gguf.cpp
|
42
41
|
${RNLLAMA_LIB_DIR}/log.cpp
|
43
42
|
${RNLLAMA_LIB_DIR}/llama-impl.cpp
|
43
|
+
${RNLLAMA_LIB_DIR}/chat-parser.cpp
|
44
|
+
${RNLLAMA_LIB_DIR}/json-partial.cpp
|
45
|
+
${RNLLAMA_LIB_DIR}/regex-partial.cpp
|
44
46
|
# Multimodal support
|
45
47
|
${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
|
46
48
|
${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
|
@@ -52,7 +54,6 @@ set(
|
|
52
54
|
${RNLLAMA_LIB_DIR}/llama-adapter.cpp
|
53
55
|
${RNLLAMA_LIB_DIR}/llama-chat.cpp
|
54
56
|
${RNLLAMA_LIB_DIR}/llama-context.cpp
|
55
|
-
${RNLLAMA_LIB_DIR}/llama-kv-cache.cpp
|
56
57
|
${RNLLAMA_LIB_DIR}/llama-arch.cpp
|
57
58
|
${RNLLAMA_LIB_DIR}/llama-batch.cpp
|
58
59
|
${RNLLAMA_LIB_DIR}/llama-cparams.cpp
|
@@ -60,6 +61,10 @@ set(
|
|
60
61
|
${RNLLAMA_LIB_DIR}/llama.cpp
|
61
62
|
${RNLLAMA_LIB_DIR}/llama-model.cpp
|
62
63
|
${RNLLAMA_LIB_DIR}/llama-model-loader.cpp
|
64
|
+
${RNLLAMA_LIB_DIR}/llama-kv-cache-unified.cpp
|
65
|
+
${RNLLAMA_LIB_DIR}/llama-kv-cache-unified-iswa.cpp
|
66
|
+
${RNLLAMA_LIB_DIR}/llama-memory-hybrid.cpp
|
67
|
+
${RNLLAMA_LIB_DIR}/llama-memory-recurrent.cpp
|
63
68
|
${RNLLAMA_LIB_DIR}/llama-mmap.cpp
|
64
69
|
${RNLLAMA_LIB_DIR}/llama-vocab.cpp
|
65
70
|
${RNLLAMA_LIB_DIR}/llama-memory.cpp
|
@@ -71,7 +76,8 @@ set(
|
|
71
76
|
${RNLLAMA_LIB_DIR}/common.cpp
|
72
77
|
${RNLLAMA_LIB_DIR}/chat.cpp
|
73
78
|
${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
|
74
|
-
${RNLLAMA_LIB_DIR}/json.hpp
|
79
|
+
${RNLLAMA_LIB_DIR}/nlohmann/json.hpp
|
80
|
+
${RNLLAMA_LIB_DIR}/nlohmann/json_fwd.hpp
|
75
81
|
${RNLLAMA_LIB_DIR}/minja/minja.hpp
|
76
82
|
${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
|
77
83
|
${RNLLAMA_LIB_DIR}/rn-llama.cpp
|
@@ -81,16 +87,28 @@ set(
|
|
81
87
|
|
82
88
|
find_library(LOG_LIB log)
|
83
89
|
|
84
|
-
function(build_library target_name cpu_flags)
|
90
|
+
function(build_library target_name arch cpu_flags)
|
91
|
+
if (NOT ${arch} STREQUAL "generic")
|
92
|
+
set(SOURCE_FILES_ARCH
|
93
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
|
94
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
|
95
|
+
)
|
96
|
+
endif ()
|
97
|
+
|
85
98
|
add_library(
|
86
99
|
${target_name}
|
87
100
|
SHARED
|
88
101
|
${SOURCE_FILES}
|
102
|
+
${SOURCE_FILES_ARCH}
|
89
103
|
)
|
90
104
|
|
91
105
|
target_link_libraries(${target_name} ${LOG_LIB} android)
|
92
106
|
|
93
|
-
|
107
|
+
if (${arch} STREQUAL "generic")
|
108
|
+
target_compile_options(${target_name} PRIVATE -DLM_GGML_CPU_GENERIC)
|
109
|
+
endif ()
|
110
|
+
|
111
|
+
target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_REPACK -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
|
94
112
|
|
95
113
|
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
96
114
|
target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
|
@@ -111,17 +129,17 @@ endfunction()
|
|
111
129
|
|
112
130
|
|
113
131
|
# Default target (no specific CPU features)
|
114
|
-
build_library("rnllama" "")
|
132
|
+
build_library("rnllama" "generic" "")
|
115
133
|
|
116
134
|
if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
117
135
|
# ARM64 targets
|
118
136
|
# Removing fp16 for now as it leads to issues with some models like deepseek r1 distills
|
119
137
|
# https://github.com/mybigday/llama.rn/pull/110#issuecomment-2609918310
|
120
|
-
build_library("rnllama_v8" "-march=armv8-a")
|
121
|
-
build_library("rnllama_v8_2" "-march=armv8.2-a")
|
122
|
-
build_library("rnllama_v8_2_dotprod" "-march=armv8.2-a+dotprod")
|
123
|
-
build_library("rnllama_v8_2_i8mm" "-march=armv8.2-a+i8mm")
|
124
|
-
build_library("rnllama_v8_2_dotprod_i8mm" "-march=armv8.2-a+dotprod+i8mm")
|
138
|
+
build_library("rnllama_v8" "arm" "-march=armv8-a")
|
139
|
+
build_library("rnllama_v8_2" "arm" "-march=armv8.2-a")
|
140
|
+
build_library("rnllama_v8_2_dotprod" "arm" "-march=armv8.2-a+dotprod")
|
141
|
+
build_library("rnllama_v8_2_i8mm" "arm" "-march=armv8.2-a+i8mm")
|
142
|
+
build_library("rnllama_v8_2_dotprod_i8mm" "arm" "-march=armv8.2-a+dotprod+i8mm")
|
125
143
|
|
126
144
|
# https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md#cross-compile-using-android-ndk
|
127
145
|
# llama.cpp will deal with the cpu features
|
@@ -131,5 +149,6 @@ if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
|
131
149
|
|
132
150
|
elseif (${ANDROID_ABI} STREQUAL "x86_64")
|
133
151
|
# x86_64 target
|
134
|
-
build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
|
152
|
+
build_library("rnllama_x86_64" "x86" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
|
153
|
+
|
135
154
|
endif ()
|
@@ -69,7 +69,11 @@ public class LlamaContext {
|
|
69
69
|
try {
|
70
70
|
if (filepath.startsWith("content")) {
|
71
71
|
Uri uri = Uri.parse(filepath);
|
72
|
-
|
72
|
+
try {
|
73
|
+
reactContext.getApplicationContext().getContentResolver().takePersistableUriPermission(uri, Intent.FLAG_GRANT_READ_URI_PERMISSION);
|
74
|
+
} catch (SecurityException e) {
|
75
|
+
Log.w(NAME, "Persistable permission not granted for URI: " + uri);
|
76
|
+
}
|
73
77
|
fis = reactContext.getApplicationContext().getContentResolver().openInputStream(uri);
|
74
78
|
} else {
|
75
79
|
fis = new FileInputStream(filepath);
|
@@ -107,7 +111,11 @@ public class LlamaContext {
|
|
107
111
|
}
|
108
112
|
|
109
113
|
String modelName = params.getString("model");
|
110
|
-
|
114
|
+
|
115
|
+
if(!isGGUF(modelName, reactContext)) {
|
116
|
+
throw new IllegalArgumentException("File is not in GGUF format");
|
117
|
+
}
|
118
|
+
|
111
119
|
if (modelName.startsWith("content://")) {
|
112
120
|
Uri uri = Uri.parse(modelName);
|
113
121
|
try {
|
@@ -117,7 +125,6 @@ public class LlamaContext {
|
|
117
125
|
Log.e(NAME, "Failed to convert to FD!");
|
118
126
|
}
|
119
127
|
}
|
120
|
-
|
121
128
|
|
122
129
|
// Check if file has GGUF magic numbers
|
123
130
|
this.id = id;
|
@@ -127,8 +134,6 @@ public class LlamaContext {
|
|
127
134
|
modelName,
|
128
135
|
// String chat_template,
|
129
136
|
params.hasKey("chat_template") ? params.getString("chat_template") : "",
|
130
|
-
// String reasoning_format,
|
131
|
-
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
|
132
137
|
// boolean embedding,
|
133
138
|
params.hasKey("embedding") ? params.getBoolean("embedding") : false,
|
134
139
|
// int embd_normalize,
|
@@ -200,6 +205,7 @@ public class LlamaContext {
|
|
200
205
|
String tools = params.hasKey("tools") ? params.getString("tools") : "";
|
201
206
|
Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
|
202
207
|
String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
|
208
|
+
Boolean enableThinking = params.hasKey("enable_thinking") ? params.getBoolean("enable_thinking") : false;
|
203
209
|
return getFormattedChatWithJinja(
|
204
210
|
this.context,
|
205
211
|
messages,
|
@@ -207,7 +213,8 @@ public class LlamaContext {
|
|
207
213
|
jsonSchema,
|
208
214
|
tools,
|
209
215
|
parallelToolCalls,
|
210
|
-
toolChoice
|
216
|
+
toolChoice,
|
217
|
+
enableThinking
|
211
218
|
);
|
212
219
|
}
|
213
220
|
|
@@ -296,12 +303,25 @@ public class LlamaContext {
|
|
296
303
|
}
|
297
304
|
}
|
298
305
|
|
306
|
+
int[] guide_tokens = null;
|
307
|
+
if (params.hasKey("guide_tokens")) {
|
308
|
+
ReadableArray guide_tokens_array = params.getArray("guide_tokens");
|
309
|
+
guide_tokens = new int[guide_tokens_array.size()];
|
310
|
+
for (int i = 0; i < guide_tokens_array.size(); i++) {
|
311
|
+
guide_tokens[i] = (int) guide_tokens_array.getDouble(i);
|
312
|
+
}
|
313
|
+
}
|
314
|
+
|
299
315
|
WritableMap result = doCompletion(
|
300
316
|
this.context,
|
301
317
|
// String prompt,
|
302
318
|
params.getString("prompt"),
|
319
|
+
// int[] guide_tokens,
|
320
|
+
guide_tokens,
|
303
321
|
// int chat_format,
|
304
322
|
params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
|
323
|
+
// String reasoning_format,
|
324
|
+
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
|
305
325
|
// String grammar,
|
306
326
|
params.hasKey("grammar") ? params.getString("grammar") : "",
|
307
327
|
// String json_schema,
|
@@ -312,6 +332,8 @@ public class LlamaContext {
|
|
312
332
|
params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
|
313
333
|
// ReadableArray preserved_tokens,
|
314
334
|
params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
|
335
|
+
// boolean thinking_forced_open,
|
336
|
+
params.hasKey("thinking_forced_open") ? params.getBoolean("thinking_forced_open") : false,
|
315
337
|
// float temperature,
|
316
338
|
params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
|
317
339
|
// int n_threads,
|
@@ -416,6 +438,27 @@ public class LlamaContext {
|
|
416
438
|
return result;
|
417
439
|
}
|
418
440
|
|
441
|
+
public WritableArray getRerank(String query, ReadableArray documents, ReadableMap params) {
|
442
|
+
if (isEmbeddingEnabled(this.context) == false) {
|
443
|
+
throw new IllegalStateException("Embedding is not enabled but required for reranking");
|
444
|
+
}
|
445
|
+
|
446
|
+
// Convert ReadableArray to Java string array
|
447
|
+
String[] documentsArray = new String[documents.size()];
|
448
|
+
for (int i = 0; i < documents.size(); i++) {
|
449
|
+
documentsArray[i] = documents.getString(i);
|
450
|
+
}
|
451
|
+
|
452
|
+
WritableArray result = rerank(
|
453
|
+
this.context,
|
454
|
+
query,
|
455
|
+
documentsArray,
|
456
|
+
// int normalize,
|
457
|
+
params.hasKey("normalize") ? params.getInt("normalize") : -1
|
458
|
+
);
|
459
|
+
return result;
|
460
|
+
}
|
461
|
+
|
419
462
|
public String bench(int pp, int tg, int pl, int nr) {
|
420
463
|
return bench(this.context, pp, tg, pl, nr);
|
421
464
|
}
|
@@ -442,6 +485,11 @@ public class LlamaContext {
|
|
442
485
|
if (mmprojPath == null || mmprojPath.isEmpty()) {
|
443
486
|
throw new IllegalArgumentException("mmproj_path is empty");
|
444
487
|
}
|
488
|
+
|
489
|
+
if(!isGGUF(mmprojPath, this.reactContext)) {
|
490
|
+
throw new IllegalArgumentException("File is not in GGUF format");
|
491
|
+
}
|
492
|
+
|
445
493
|
File file = new File(mmprojPath);
|
446
494
|
if (!mmprojPath.startsWith("content") && !file.exists()) {
|
447
495
|
throw new IllegalArgumentException("mmproj file does not exist: " + mmprojPath);
|
@@ -475,6 +523,34 @@ public class LlamaContext {
|
|
475
523
|
releaseMultimodal(this.context);
|
476
524
|
}
|
477
525
|
|
526
|
+
public boolean initVocoder(String vocoderModelPath) {
|
527
|
+
return initVocoder(this.context, vocoderModelPath);
|
528
|
+
}
|
529
|
+
|
530
|
+
public boolean isVocoderEnabled() {
|
531
|
+
return isVocoderEnabled(this.context);
|
532
|
+
}
|
533
|
+
|
534
|
+
public String getFormattedAudioCompletion(String speakerJsonStr, String textToSpeak) {
|
535
|
+
return getFormattedAudioCompletion(this.context, speakerJsonStr, textToSpeak);
|
536
|
+
}
|
537
|
+
|
538
|
+
public WritableArray getAudioCompletionGuideTokens(String textToSpeak) {
|
539
|
+
return getAudioCompletionGuideTokens(this.context, textToSpeak);
|
540
|
+
}
|
541
|
+
|
542
|
+
public WritableArray decodeAudioTokens(ReadableArray tokens) {
|
543
|
+
int[] toks = new int[tokens.size()];
|
544
|
+
for (int i = 0; i < tokens.size(); i++) {
|
545
|
+
toks[i] = (int) tokens.getDouble(i);
|
546
|
+
}
|
547
|
+
return decodeAudioTokens(this.context, toks);
|
548
|
+
}
|
549
|
+
|
550
|
+
public void releaseVocoder() {
|
551
|
+
releaseVocoder(this.context);
|
552
|
+
}
|
553
|
+
|
478
554
|
public void release() {
|
479
555
|
freeContext(context);
|
480
556
|
}
|
@@ -576,7 +652,6 @@ public class LlamaContext {
|
|
576
652
|
protected static native long initContext(
|
577
653
|
String model_path,
|
578
654
|
String chat_template,
|
579
|
-
String reasoning_format,
|
580
655
|
boolean embedding,
|
581
656
|
int embd_normalize,
|
582
657
|
int n_ctx,
|
@@ -613,7 +688,8 @@ public class LlamaContext {
|
|
613
688
|
String jsonSchema,
|
614
689
|
String tools,
|
615
690
|
boolean parallelToolCalls,
|
616
|
-
String toolChoice
|
691
|
+
String toolChoice,
|
692
|
+
boolean enableThinking
|
617
693
|
);
|
618
694
|
protected static native String getFormattedChat(
|
619
695
|
long contextPtr,
|
@@ -632,12 +708,15 @@ public class LlamaContext {
|
|
632
708
|
protected static native WritableMap doCompletion(
|
633
709
|
long context_ptr,
|
634
710
|
String prompt,
|
711
|
+
int[] guide_tokens,
|
635
712
|
int chat_format,
|
713
|
+
String reasoning_format,
|
636
714
|
String grammar,
|
637
715
|
String json_schema,
|
638
716
|
boolean grammar_lazy,
|
639
717
|
ReadableArray grammar_triggers,
|
640
718
|
ReadableArray preserved_tokens,
|
719
|
+
boolean thinking_forced_open,
|
641
720
|
float temperature,
|
642
721
|
int n_threads,
|
643
722
|
int n_predict,
|
@@ -678,6 +757,7 @@ public class LlamaContext {
|
|
678
757
|
String text,
|
679
758
|
int embd_normalize
|
680
759
|
);
|
760
|
+
protected static native WritableArray rerank(long contextPtr, String query, String[] documents, int normalize);
|
681
761
|
protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
|
682
762
|
protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
|
683
763
|
protected static native void removeLoraAdapters(long contextPtr);
|
@@ -686,4 +766,10 @@ public class LlamaContext {
|
|
686
766
|
protected static native void setupLog(NativeLogCallback logCallback);
|
687
767
|
protected static native void unsetLog();
|
688
768
|
protected static native void releaseMultimodal(long contextPtr);
|
769
|
+
protected static native boolean isVocoderEnabled(long contextPtr);
|
770
|
+
protected static native String getFormattedAudioCompletion(long contextPtr, String speakerJsonStr, String textToSpeak);
|
771
|
+
protected static native WritableArray getAudioCompletionGuideTokens(long contextPtr, String textToSpeak);
|
772
|
+
protected static native WritableArray decodeAudioTokens(long contextPtr, int[] tokens);
|
773
|
+
protected static native boolean initVocoder(long contextPtr, String vocoderModelPath);
|
774
|
+
protected static native void releaseVocoder(long contextPtr);
|
689
775
|
}
|