cui-llama.rn 1.7.4 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
- package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/README.md
CHANGED
@@ -55,6 +55,8 @@ For get a GGUF model or quantize manually, see [`Prepare and Quantize`](https://
|
|
55
55
|
|
56
56
|
## Usage
|
57
57
|
|
58
|
+
> **💡 New!** `llama.rn` now supports **multimodal models** with vision and audio capabilities! See the [Multimodal section](#multimodal-vision--audio) for details.
|
59
|
+
|
58
60
|
Load model info only:
|
59
61
|
|
60
62
|
```js
|
@@ -123,49 +125,162 @@ console.log('Result:', textResult.text)
|
|
123
125
|
console.log('Timings:', textResult.timings)
|
124
126
|
```
|
125
127
|
|
126
|
-
The binding
|
128
|
+
The binding's deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
|
127
129
|
|
128
130
|
- `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
|
129
131
|
- `/tokenize`: `context.tokenize(content)`
|
130
132
|
- `/detokenize`: `context.detokenize(tokens)`
|
131
133
|
- `/embedding`: `context.embedding(content)`
|
134
|
+
- `/rerank`: `context.rerank(query, documents, params)`
|
132
135
|
- ... Other methods
|
133
136
|
|
134
137
|
Please visit the [Documentation](docs/API) for more details.
|
135
138
|
|
136
139
|
You can also visit the [example](example) to see how to use it.
|
137
140
|
|
138
|
-
##
|
141
|
+
## Multimodal (Vision & Audio)
|
139
142
|
|
140
|
-
|
143
|
+
`llama.rn` supports multimodal capabilities including vision (images) and audio processing. This allows you to interact with models that can understand both text and media content.
|
144
|
+
|
145
|
+
### Supported Media Formats
|
146
|
+
|
147
|
+
**Images (Vision):**
|
148
|
+
- JPEG, PNG, BMP, GIF, TGA, HDR, PIC, PNM
|
149
|
+
- Base64 encoded images (data URLs)
|
150
|
+
- Local file paths
|
151
|
+
- \* Not supported HTTP URLs yet
|
152
|
+
|
153
|
+
**Audio:**
|
154
|
+
- WAV, MP3 formats
|
155
|
+
- Base64 encoded audio (data URLs)
|
156
|
+
- Local file paths
|
157
|
+
- \* Not supported HTTP URLs yet
|
158
|
+
|
159
|
+
### Setup
|
160
|
+
|
161
|
+
First, you need a multimodal model and its corresponding multimodal projector (mmproj) file, see [how to obtain mmproj](https://github.com/ggml-org/llama.cpp/tree/master/tools/mtmd#how-to-obtain-mmproj) for more details.
|
162
|
+
|
163
|
+
### Initialize Multimodal Support
|
141
164
|
|
142
165
|
```js
|
143
|
-
|
166
|
+
import { initLlama } from 'llama.rn'
|
144
167
|
|
145
|
-
//
|
168
|
+
// First initialize the model context
|
169
|
+
const context = await initLlama({
|
170
|
+
model: 'path/to/your/multimodal-model.gguf',
|
171
|
+
n_ctx: 4096,
|
172
|
+
n_gpu_layers: 99, // Recommended for multimodal models
|
173
|
+
// Important: Disable context shifting for multimodal
|
174
|
+
ctx_shift: false,
|
175
|
+
})
|
146
176
|
|
147
|
-
//
|
148
|
-
await context.
|
177
|
+
// Initialize multimodal support with mmproj file
|
178
|
+
const success = await context.initMultimodal({
|
179
|
+
path: 'path/to/your/mmproj-model.gguf',
|
180
|
+
use_gpu: true, // Recommended for better performance
|
181
|
+
})
|
149
182
|
|
150
|
-
//
|
151
|
-
|
183
|
+
// Check if multimodal is enabled
|
184
|
+
console.log('Multimodal enabled:', await context.isMultimodalEnabled())
|
185
|
+
|
186
|
+
if (success) {
|
187
|
+
console.log('Multimodal support initialized!')
|
188
|
+
|
189
|
+
// Check what modalities are supported
|
190
|
+
const support = await context.getMultimodalSupport()
|
191
|
+
console.log('Vision support:', support.vision)
|
192
|
+
console.log('Audio support:', support.audio)
|
193
|
+
} else {
|
194
|
+
console.log('Failed to initialize multimodal support')
|
195
|
+
}
|
196
|
+
|
197
|
+
// Release multimodal context
|
198
|
+
await context.releaseMultimodal()
|
152
199
|
```
|
153
200
|
|
154
|
-
|
201
|
+
### Usage Examples
|
155
202
|
|
156
|
-
|
203
|
+
#### Vision (Image Processing)
|
157
204
|
|
158
205
|
```js
|
159
|
-
const
|
160
|
-
|
161
|
-
|
206
|
+
const result = await context.completion({
|
207
|
+
messages: [
|
208
|
+
{
|
209
|
+
role: 'user',
|
210
|
+
content: [
|
211
|
+
{
|
212
|
+
type: 'text',
|
213
|
+
text: 'What do you see in this image?',
|
214
|
+
},
|
215
|
+
{
|
216
|
+
type: 'image_url',
|
217
|
+
image_url: {
|
218
|
+
url: 'file:///path/to/image.jpg',
|
219
|
+
// or base64: '...'
|
220
|
+
},
|
221
|
+
},
|
222
|
+
],
|
223
|
+
},
|
224
|
+
],
|
225
|
+
n_predict: 100,
|
226
|
+
temperature: 0.1,
|
162
227
|
})
|
163
228
|
|
164
|
-
|
229
|
+
console.log('AI Response:', result.text)
|
165
230
|
```
|
166
231
|
|
167
|
-
|
168
|
-
|
232
|
+
#### Audio Processing
|
233
|
+
|
234
|
+
```js
|
235
|
+
// Method 1: Using structured message content (Recommended)
|
236
|
+
const result = await context.completion({
|
237
|
+
messages: [
|
238
|
+
{
|
239
|
+
role: 'user',
|
240
|
+
content: [
|
241
|
+
{
|
242
|
+
type: 'text',
|
243
|
+
text: 'Transcribe or describe this audio:',
|
244
|
+
},
|
245
|
+
{
|
246
|
+
type: 'input_audio',
|
247
|
+
input_audio: {
|
248
|
+
data: 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10...',
|
249
|
+
// or url: 'file:///path/to/audio.wav',
|
250
|
+
format: 'wav', // or 'mp3'
|
251
|
+
},
|
252
|
+
},
|
253
|
+
],
|
254
|
+
},
|
255
|
+
],
|
256
|
+
n_predict: 200,
|
257
|
+
})
|
258
|
+
|
259
|
+
console.log('Transcription:', result.text)
|
260
|
+
```
|
261
|
+
|
262
|
+
### Tokenization with Media
|
263
|
+
|
264
|
+
```js
|
265
|
+
// Tokenize text with media
|
266
|
+
const tokenizeResult = await context.tokenize(
|
267
|
+
'Describe this image: <__media__>',
|
268
|
+
{
|
269
|
+
media_paths: ['file:///path/to/image.jpg']
|
270
|
+
}
|
271
|
+
)
|
272
|
+
|
273
|
+
console.log('Tokens:', tokenizeResult.tokens)
|
274
|
+
console.log('Has media:', tokenizeResult.has_media)
|
275
|
+
console.log('Media positions:', tokenizeResult.chunk_pos_media)
|
276
|
+
```
|
277
|
+
|
278
|
+
### Notes
|
279
|
+
|
280
|
+
- **Context Shifting**: Multimodal models require `ctx_shift: false` to maintain media token positioning
|
281
|
+
- **Memory**: Multimodal models require more memory; use adequate `n_ctx` and consider GPU offloading
|
282
|
+
- **Media Markers**: The system automatically handles `<__media__>` markers in prompts. When using structured message content, media items are automatically replaced with this marker
|
283
|
+
- **Model Compatibility**: Ensure your model supports the media type you're trying to process
|
169
284
|
|
170
285
|
## Tool Calling
|
171
286
|
|
@@ -289,6 +404,91 @@ console.log('Result:', text)
|
|
289
404
|
|
290
405
|
Also, this is how `json_schema` works in `response_format` during completion, it converts the json_schema to gbnf grammar.
|
291
406
|
|
407
|
+
## Session (State)
|
408
|
+
|
409
|
+
The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
|
410
|
+
|
411
|
+
```js
|
412
|
+
const context = await initLlama({ ...params })
|
413
|
+
|
414
|
+
// After prompt processing or completion ...
|
415
|
+
|
416
|
+
// Save the session
|
417
|
+
await context.saveSession('<path to save session>')
|
418
|
+
|
419
|
+
// Load the session
|
420
|
+
await context.loadSession('<path to load session>')
|
421
|
+
```
|
422
|
+
|
423
|
+
### Notes
|
424
|
+
|
425
|
+
- \* Session is currently not supported save state from multimodal context, so it only stores the text chunk before the first media chunk.
|
426
|
+
|
427
|
+
## Embedding
|
428
|
+
|
429
|
+
The embedding API is used to get the embedding of a text.
|
430
|
+
|
431
|
+
```js
|
432
|
+
const context = await initLlama({
|
433
|
+
...params,
|
434
|
+
embedding: true,
|
435
|
+
})
|
436
|
+
|
437
|
+
const { embedding } = await context.embedding('Hello, world!')
|
438
|
+
```
|
439
|
+
|
440
|
+
- You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
|
441
|
+
- You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
|
442
|
+
|
443
|
+
## Rerank
|
444
|
+
|
445
|
+
The rerank API is used to rank documents based on their relevance to a query. This is particularly useful for improving search results and implementing retrieval-augmented generation (RAG) systems.
|
446
|
+
|
447
|
+
```js
|
448
|
+
const context = await initLlama({
|
449
|
+
...params,
|
450
|
+
embedding: true, // Required for reranking
|
451
|
+
pooling_type: 'rank', // Use rank pooling for rerank models
|
452
|
+
})
|
453
|
+
|
454
|
+
// Rerank documents based on relevance to query
|
455
|
+
const results = await context.rerank(
|
456
|
+
'What is artificial intelligence?', // query
|
457
|
+
[
|
458
|
+
'AI is a branch of computer science.',
|
459
|
+
'The weather is nice today.',
|
460
|
+
'Machine learning is a subset of AI.',
|
461
|
+
'I like pizza.',
|
462
|
+
], // documents to rank
|
463
|
+
{
|
464
|
+
normalize: 1, // Optional: normalize scores (default: from model config)
|
465
|
+
}
|
466
|
+
)
|
467
|
+
|
468
|
+
// Results are automatically sorted by score (highest first)
|
469
|
+
results.forEach((result, index) => {
|
470
|
+
console.log(`Rank ${index + 1}:`, {
|
471
|
+
score: result.score,
|
472
|
+
document: result.document,
|
473
|
+
originalIndex: result.index,
|
474
|
+
})
|
475
|
+
})
|
476
|
+
```
|
477
|
+
|
478
|
+
### Notes
|
479
|
+
|
480
|
+
- **Model Requirements**: Reranking requires models with `RANK` pooling type (e.g., reranker models)
|
481
|
+
- **Embedding Enabled**: The context must have `embedding: true` to use rerank functionality
|
482
|
+
- **Automatic Sorting**: Results are returned sorted by relevance score in descending order
|
483
|
+
- **Document Access**: Each result includes the original document text and its index in the input array
|
484
|
+
- **Score Interpretation**: Higher scores indicate higher relevance to the query
|
485
|
+
|
486
|
+
### Recommended Models
|
487
|
+
|
488
|
+
- [jinaai - jina-reranker-v2-base-multilingual-GGUF](https://huggingface.co/gpustack/jina-reranker-v2-base-multilingual-GGUF)
|
489
|
+
- [BAAI - bge-reranker-v2-m3-GGUF](https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF)
|
490
|
+
- Other models with "rerank" or "reranker" in their name and GGUF format
|
491
|
+
|
292
492
|
## Mock `llama.rn`
|
293
493
|
|
294
494
|
We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
|
@@ -27,12 +27,11 @@ set(
|
|
27
27
|
${RNLLAMA_LIB_DIR}/ggml-cpu/amx/mmq.cpp
|
28
28
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.c
|
29
29
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
|
30
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
31
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
32
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/
|
30
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/quants.c
|
31
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/traits.cpp
|
32
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/repack.cpp
|
33
33
|
${RNLLAMA_LIB_DIR}/ggml-cpu/unary-ops.cpp
|
34
34
|
${RNLLAMA_LIB_DIR}/ggml-cpu/binary-ops.cpp
|
35
|
-
${RNLLAMA_LIB_DIR}/ggml-cpu/sgemm.cpp
|
36
35
|
${RNLLAMA_LIB_DIR}/ggml-cpu/vec.cpp
|
37
36
|
${RNLLAMA_LIB_DIR}/ggml-cpu/ops.cpp
|
38
37
|
${RNLLAMA_LIB_DIR}/ggml-opt.cpp
|
@@ -41,6 +40,9 @@ set(
|
|
41
40
|
${RNLLAMA_LIB_DIR}/gguf.cpp
|
42
41
|
${RNLLAMA_LIB_DIR}/log.cpp
|
43
42
|
${RNLLAMA_LIB_DIR}/llama-impl.cpp
|
43
|
+
${RNLLAMA_LIB_DIR}/chat-parser.cpp
|
44
|
+
${RNLLAMA_LIB_DIR}/json-partial.cpp
|
45
|
+
${RNLLAMA_LIB_DIR}/regex-partial.cpp
|
44
46
|
# Multimodal support
|
45
47
|
${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
|
46
48
|
${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
|
@@ -52,7 +54,6 @@ set(
|
|
52
54
|
${RNLLAMA_LIB_DIR}/llama-adapter.cpp
|
53
55
|
${RNLLAMA_LIB_DIR}/llama-chat.cpp
|
54
56
|
${RNLLAMA_LIB_DIR}/llama-context.cpp
|
55
|
-
${RNLLAMA_LIB_DIR}/llama-kv-cache.cpp
|
56
57
|
${RNLLAMA_LIB_DIR}/llama-arch.cpp
|
57
58
|
${RNLLAMA_LIB_DIR}/llama-batch.cpp
|
58
59
|
${RNLLAMA_LIB_DIR}/llama-cparams.cpp
|
@@ -60,6 +61,10 @@ set(
|
|
60
61
|
${RNLLAMA_LIB_DIR}/llama.cpp
|
61
62
|
${RNLLAMA_LIB_DIR}/llama-model.cpp
|
62
63
|
${RNLLAMA_LIB_DIR}/llama-model-loader.cpp
|
64
|
+
${RNLLAMA_LIB_DIR}/llama-kv-cache-unified.cpp
|
65
|
+
${RNLLAMA_LIB_DIR}/llama-kv-cache-unified-iswa.cpp
|
66
|
+
${RNLLAMA_LIB_DIR}/llama-memory-hybrid.cpp
|
67
|
+
${RNLLAMA_LIB_DIR}/llama-memory-recurrent.cpp
|
63
68
|
${RNLLAMA_LIB_DIR}/llama-mmap.cpp
|
64
69
|
${RNLLAMA_LIB_DIR}/llama-vocab.cpp
|
65
70
|
${RNLLAMA_LIB_DIR}/llama-memory.cpp
|
@@ -71,7 +76,8 @@ set(
|
|
71
76
|
${RNLLAMA_LIB_DIR}/common.cpp
|
72
77
|
${RNLLAMA_LIB_DIR}/chat.cpp
|
73
78
|
${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
|
74
|
-
${RNLLAMA_LIB_DIR}/json.hpp
|
79
|
+
${RNLLAMA_LIB_DIR}/nlohmann/json.hpp
|
80
|
+
${RNLLAMA_LIB_DIR}/nlohmann/json_fwd.hpp
|
75
81
|
${RNLLAMA_LIB_DIR}/minja/minja.hpp
|
76
82
|
${RNLLAMA_LIB_DIR}/minja/chat-template.hpp
|
77
83
|
${RNLLAMA_LIB_DIR}/rn-llama.cpp
|
@@ -81,16 +87,28 @@ set(
|
|
81
87
|
|
82
88
|
find_library(LOG_LIB log)
|
83
89
|
|
84
|
-
function(build_library target_name cpu_flags)
|
90
|
+
function(build_library target_name arch cpu_flags)
|
91
|
+
if (NOT ${arch} STREQUAL "generic")
|
92
|
+
set(SOURCE_FILES_ARCH
|
93
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
|
94
|
+
${RNLLAMA_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
|
95
|
+
)
|
96
|
+
endif ()
|
97
|
+
|
85
98
|
add_library(
|
86
99
|
${target_name}
|
87
100
|
SHARED
|
88
101
|
${SOURCE_FILES}
|
102
|
+
${SOURCE_FILES_ARCH}
|
89
103
|
)
|
90
104
|
|
91
105
|
target_link_libraries(${target_name} ${LOG_LIB} android)
|
92
106
|
|
93
|
-
|
107
|
+
if (${arch} STREQUAL "generic")
|
108
|
+
target_compile_options(${target_name} PRIVATE -DLM_GGML_CPU_GENERIC)
|
109
|
+
endif ()
|
110
|
+
|
111
|
+
target_compile_options(${target_name} PRIVATE -DLM_GGML_USE_CPU -DLM_GGML_USE_CPU_REPACK -DRNLLAMA_USE_FD_FILE -pthread ${cpu_flags})
|
94
112
|
|
95
113
|
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
96
114
|
target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
|
@@ -111,17 +129,17 @@ endfunction()
|
|
111
129
|
|
112
130
|
|
113
131
|
# Default target (no specific CPU features)
|
114
|
-
build_library("rnllama" "")
|
132
|
+
build_library("rnllama" "generic" "")
|
115
133
|
|
116
134
|
if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
117
135
|
# ARM64 targets
|
118
136
|
# Removing fp16 for now as it leads to issues with some models like deepseek r1 distills
|
119
137
|
# https://github.com/mybigday/llama.rn/pull/110#issuecomment-2609918310
|
120
|
-
build_library("rnllama_v8" "-march=armv8-a")
|
121
|
-
build_library("rnllama_v8_2" "-march=armv8.2-a")
|
122
|
-
build_library("rnllama_v8_2_dotprod" "-march=armv8.2-a+dotprod")
|
123
|
-
build_library("rnllama_v8_2_i8mm" "-march=armv8.2-a+i8mm")
|
124
|
-
build_library("rnllama_v8_2_dotprod_i8mm" "-march=armv8.2-a+dotprod+i8mm")
|
138
|
+
build_library("rnllama_v8" "arm" "-march=armv8-a")
|
139
|
+
build_library("rnllama_v8_2" "arm" "-march=armv8.2-a")
|
140
|
+
build_library("rnllama_v8_2_dotprod" "arm" "-march=armv8.2-a+dotprod")
|
141
|
+
build_library("rnllama_v8_2_i8mm" "arm" "-march=armv8.2-a+i8mm")
|
142
|
+
build_library("rnllama_v8_2_dotprod_i8mm" "arm" "-march=armv8.2-a+dotprod+i8mm")
|
125
143
|
|
126
144
|
# https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md#cross-compile-using-android-ndk
|
127
145
|
# llama.cpp will deal with the cpu features
|
@@ -131,5 +149,6 @@ if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
|
131
149
|
|
132
150
|
elseif (${ANDROID_ABI} STREQUAL "x86_64")
|
133
151
|
# x86_64 target
|
134
|
-
build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
|
152
|
+
build_library("rnllama_x86_64" "x86" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
|
153
|
+
|
135
154
|
endif ()
|
@@ -134,8 +134,6 @@ public class LlamaContext {
|
|
134
134
|
modelName,
|
135
135
|
// String chat_template,
|
136
136
|
params.hasKey("chat_template") ? params.getString("chat_template") : "",
|
137
|
-
// String reasoning_format,
|
138
|
-
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
|
139
137
|
// boolean embedding,
|
140
138
|
params.hasKey("embedding") ? params.getBoolean("embedding") : false,
|
141
139
|
// int embd_normalize,
|
@@ -207,6 +205,7 @@ public class LlamaContext {
|
|
207
205
|
String tools = params.hasKey("tools") ? params.getString("tools") : "";
|
208
206
|
Boolean parallelToolCalls = params.hasKey("parallel_tool_calls") ? params.getBoolean("parallel_tool_calls") : false;
|
209
207
|
String toolChoice = params.hasKey("tool_choice") ? params.getString("tool_choice") : "";
|
208
|
+
Boolean enableThinking = params.hasKey("enable_thinking") ? params.getBoolean("enable_thinking") : false;
|
210
209
|
return getFormattedChatWithJinja(
|
211
210
|
this.context,
|
212
211
|
messages,
|
@@ -214,7 +213,8 @@ public class LlamaContext {
|
|
214
213
|
jsonSchema,
|
215
214
|
tools,
|
216
215
|
parallelToolCalls,
|
217
|
-
toolChoice
|
216
|
+
toolChoice,
|
217
|
+
enableThinking
|
218
218
|
);
|
219
219
|
}
|
220
220
|
|
@@ -303,12 +303,25 @@ public class LlamaContext {
|
|
303
303
|
}
|
304
304
|
}
|
305
305
|
|
306
|
+
int[] guide_tokens = null;
|
307
|
+
if (params.hasKey("guide_tokens")) {
|
308
|
+
ReadableArray guide_tokens_array = params.getArray("guide_tokens");
|
309
|
+
guide_tokens = new int[guide_tokens_array.size()];
|
310
|
+
for (int i = 0; i < guide_tokens_array.size(); i++) {
|
311
|
+
guide_tokens[i] = (int) guide_tokens_array.getDouble(i);
|
312
|
+
}
|
313
|
+
}
|
314
|
+
|
306
315
|
WritableMap result = doCompletion(
|
307
316
|
this.context,
|
308
317
|
// String prompt,
|
309
318
|
params.getString("prompt"),
|
319
|
+
// int[] guide_tokens,
|
320
|
+
guide_tokens,
|
310
321
|
// int chat_format,
|
311
322
|
params.hasKey("chat_format") ? params.getInt("chat_format") : 0,
|
323
|
+
// String reasoning_format,
|
324
|
+
params.hasKey("reasoning_format") ? params.getString("reasoning_format") : "none",
|
312
325
|
// String grammar,
|
313
326
|
params.hasKey("grammar") ? params.getString("grammar") : "",
|
314
327
|
// String json_schema,
|
@@ -319,6 +332,8 @@ public class LlamaContext {
|
|
319
332
|
params.hasKey("grammar_triggers") ? params.getArray("grammar_triggers") : null,
|
320
333
|
// ReadableArray preserved_tokens,
|
321
334
|
params.hasKey("preserved_tokens") ? params.getArray("preserved_tokens") : null,
|
335
|
+
// boolean thinking_forced_open,
|
336
|
+
params.hasKey("thinking_forced_open") ? params.getBoolean("thinking_forced_open") : false,
|
322
337
|
// float temperature,
|
323
338
|
params.hasKey("temperature") ? (float) params.getDouble("temperature") : 0.7f,
|
324
339
|
// int n_threads,
|
@@ -423,6 +438,27 @@ public class LlamaContext {
|
|
423
438
|
return result;
|
424
439
|
}
|
425
440
|
|
441
|
+
public WritableArray getRerank(String query, ReadableArray documents, ReadableMap params) {
|
442
|
+
if (isEmbeddingEnabled(this.context) == false) {
|
443
|
+
throw new IllegalStateException("Embedding is not enabled but required for reranking");
|
444
|
+
}
|
445
|
+
|
446
|
+
// Convert ReadableArray to Java string array
|
447
|
+
String[] documentsArray = new String[documents.size()];
|
448
|
+
for (int i = 0; i < documents.size(); i++) {
|
449
|
+
documentsArray[i] = documents.getString(i);
|
450
|
+
}
|
451
|
+
|
452
|
+
WritableArray result = rerank(
|
453
|
+
this.context,
|
454
|
+
query,
|
455
|
+
documentsArray,
|
456
|
+
// int normalize,
|
457
|
+
params.hasKey("normalize") ? params.getInt("normalize") : -1
|
458
|
+
);
|
459
|
+
return result;
|
460
|
+
}
|
461
|
+
|
426
462
|
public String bench(int pp, int tg, int pl, int nr) {
|
427
463
|
return bench(this.context, pp, tg, pl, nr);
|
428
464
|
}
|
@@ -487,6 +523,34 @@ public class LlamaContext {
|
|
487
523
|
releaseMultimodal(this.context);
|
488
524
|
}
|
489
525
|
|
526
|
+
public boolean initVocoder(String vocoderModelPath) {
|
527
|
+
return initVocoder(this.context, vocoderModelPath);
|
528
|
+
}
|
529
|
+
|
530
|
+
public boolean isVocoderEnabled() {
|
531
|
+
return isVocoderEnabled(this.context);
|
532
|
+
}
|
533
|
+
|
534
|
+
public String getFormattedAudioCompletion(String speakerJsonStr, String textToSpeak) {
|
535
|
+
return getFormattedAudioCompletion(this.context, speakerJsonStr, textToSpeak);
|
536
|
+
}
|
537
|
+
|
538
|
+
public WritableArray getAudioCompletionGuideTokens(String textToSpeak) {
|
539
|
+
return getAudioCompletionGuideTokens(this.context, textToSpeak);
|
540
|
+
}
|
541
|
+
|
542
|
+
public WritableArray decodeAudioTokens(ReadableArray tokens) {
|
543
|
+
int[] toks = new int[tokens.size()];
|
544
|
+
for (int i = 0; i < tokens.size(); i++) {
|
545
|
+
toks[i] = (int) tokens.getDouble(i);
|
546
|
+
}
|
547
|
+
return decodeAudioTokens(this.context, toks);
|
548
|
+
}
|
549
|
+
|
550
|
+
public void releaseVocoder() {
|
551
|
+
releaseVocoder(this.context);
|
552
|
+
}
|
553
|
+
|
490
554
|
public void release() {
|
491
555
|
freeContext(context);
|
492
556
|
}
|
@@ -588,7 +652,6 @@ public class LlamaContext {
|
|
588
652
|
protected static native long initContext(
|
589
653
|
String model_path,
|
590
654
|
String chat_template,
|
591
|
-
String reasoning_format,
|
592
655
|
boolean embedding,
|
593
656
|
int embd_normalize,
|
594
657
|
int n_ctx,
|
@@ -625,7 +688,8 @@ public class LlamaContext {
|
|
625
688
|
String jsonSchema,
|
626
689
|
String tools,
|
627
690
|
boolean parallelToolCalls,
|
628
|
-
String toolChoice
|
691
|
+
String toolChoice,
|
692
|
+
boolean enableThinking
|
629
693
|
);
|
630
694
|
protected static native String getFormattedChat(
|
631
695
|
long contextPtr,
|
@@ -644,12 +708,15 @@ public class LlamaContext {
|
|
644
708
|
protected static native WritableMap doCompletion(
|
645
709
|
long context_ptr,
|
646
710
|
String prompt,
|
711
|
+
int[] guide_tokens,
|
647
712
|
int chat_format,
|
713
|
+
String reasoning_format,
|
648
714
|
String grammar,
|
649
715
|
String json_schema,
|
650
716
|
boolean grammar_lazy,
|
651
717
|
ReadableArray grammar_triggers,
|
652
718
|
ReadableArray preserved_tokens,
|
719
|
+
boolean thinking_forced_open,
|
653
720
|
float temperature,
|
654
721
|
int n_threads,
|
655
722
|
int n_predict,
|
@@ -690,6 +757,7 @@ public class LlamaContext {
|
|
690
757
|
String text,
|
691
758
|
int embd_normalize
|
692
759
|
);
|
760
|
+
protected static native WritableArray rerank(long contextPtr, String query, String[] documents, int normalize);
|
693
761
|
protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
|
694
762
|
protected static native int applyLoraAdapters(long contextPtr, ReadableArray loraAdapters);
|
695
763
|
protected static native void removeLoraAdapters(long contextPtr);
|
@@ -698,4 +766,10 @@ public class LlamaContext {
|
|
698
766
|
protected static native void setupLog(NativeLogCallback logCallback);
|
699
767
|
protected static native void unsetLog();
|
700
768
|
protected static native void releaseMultimodal(long contextPtr);
|
769
|
+
protected static native boolean isVocoderEnabled(long contextPtr);
|
770
|
+
protected static native String getFormattedAudioCompletion(long contextPtr, String speakerJsonStr, String textToSpeak);
|
771
|
+
protected static native WritableArray getAudioCompletionGuideTokens(long contextPtr, String textToSpeak);
|
772
|
+
protected static native WritableArray decodeAudioTokens(long contextPtr, int[] tokens);
|
773
|
+
protected static native boolean initVocoder(long contextPtr, String vocoderModelPath);
|
774
|
+
protected static native void releaseVocoder(long contextPtr);
|
701
775
|
}
|