cui-llama.rn 1.7.4 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
- package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -16,6 +16,7 @@ import com.facebook.react.bridge.LifecycleEventListener;
|
|
16
16
|
import com.facebook.react.bridge.ReadableMap;
|
17
17
|
import com.facebook.react.bridge.ReadableArray;
|
18
18
|
import com.facebook.react.bridge.WritableMap;
|
19
|
+
import com.facebook.react.bridge.WritableArray;
|
19
20
|
import com.facebook.react.bridge.Arguments;
|
20
21
|
|
21
22
|
|
@@ -494,6 +495,38 @@ public class RNLlama implements LifecycleEventListener {
|
|
494
495
|
tasks.put(task, "embedding-" + contextId);
|
495
496
|
}
|
496
497
|
|
498
|
+
public void rerank(double id, final String query, final ReadableArray documents, final ReadableMap params, final Promise promise) {
|
499
|
+
final int contextId = (int) id;
|
500
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
|
501
|
+
private Exception exception;
|
502
|
+
|
503
|
+
@Override
|
504
|
+
protected WritableArray doInBackground(Void... voids) {
|
505
|
+
try {
|
506
|
+
LlamaContext context = contexts.get(contextId);
|
507
|
+
if (context == null) {
|
508
|
+
throw new Exception("Context not found");
|
509
|
+
}
|
510
|
+
return context.getRerank(query, documents, params);
|
511
|
+
} catch (Exception e) {
|
512
|
+
exception = e;
|
513
|
+
}
|
514
|
+
return null;
|
515
|
+
}
|
516
|
+
|
517
|
+
@Override
|
518
|
+
protected void onPostExecute(WritableArray result) {
|
519
|
+
if (exception != null) {
|
520
|
+
promise.reject(exception);
|
521
|
+
return;
|
522
|
+
}
|
523
|
+
promise.resolve(result);
|
524
|
+
tasks.remove(this);
|
525
|
+
}
|
526
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
527
|
+
tasks.put(task, "rerank-" + contextId);
|
528
|
+
}
|
529
|
+
|
497
530
|
public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
|
498
531
|
final int contextId = (int) id;
|
499
532
|
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
@@ -759,6 +792,210 @@ public class RNLlama implements LifecycleEventListener {
|
|
759
792
|
tasks.put(task, "releaseMultimodal" + id);
|
760
793
|
}
|
761
794
|
|
795
|
+
public void initVocoder(double id, final String vocoderModelPath, final Promise promise) {
|
796
|
+
final int contextId = (int) id;
|
797
|
+
AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
|
798
|
+
private Exception exception;
|
799
|
+
|
800
|
+
@Override
|
801
|
+
protected Boolean doInBackground(Void... voids) {
|
802
|
+
try {
|
803
|
+
LlamaContext context = contexts.get(contextId);
|
804
|
+
if (context == null) {
|
805
|
+
throw new Exception("Context not found");
|
806
|
+
}
|
807
|
+
if (context.isPredicting()) {
|
808
|
+
throw new Exception("Context is busy");
|
809
|
+
}
|
810
|
+
return context.initVocoder(vocoderModelPath);
|
811
|
+
} catch (Exception e) {
|
812
|
+
exception = e;
|
813
|
+
}
|
814
|
+
return false;
|
815
|
+
}
|
816
|
+
|
817
|
+
@Override
|
818
|
+
protected void onPostExecute(Boolean result) {
|
819
|
+
if (exception != null) {
|
820
|
+
promise.reject(exception);
|
821
|
+
return;
|
822
|
+
}
|
823
|
+
promise.resolve(result);
|
824
|
+
tasks.remove(this);
|
825
|
+
}
|
826
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
827
|
+
tasks.put(task, "initVocoder-" + contextId);
|
828
|
+
}
|
829
|
+
|
830
|
+
public void releaseVocoder(double id, final Promise promise) {
|
831
|
+
final int contextId = (int) id;
|
832
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
833
|
+
private Exception exception;
|
834
|
+
|
835
|
+
@Override
|
836
|
+
protected Void doInBackground(Void... voids) {
|
837
|
+
try {
|
838
|
+
LlamaContext context = contexts.get(contextId);
|
839
|
+
if (context == null) {
|
840
|
+
throw new Exception("Context not found");
|
841
|
+
}
|
842
|
+
context.releaseVocoder();
|
843
|
+
} catch (Exception e) {
|
844
|
+
exception = e;
|
845
|
+
}
|
846
|
+
return null;
|
847
|
+
}
|
848
|
+
|
849
|
+
@Override
|
850
|
+
protected void onPostExecute(Void result) {
|
851
|
+
if (exception != null) {
|
852
|
+
promise.reject(exception);
|
853
|
+
return;
|
854
|
+
}
|
855
|
+
promise.resolve(null);
|
856
|
+
tasks.remove(this);
|
857
|
+
}
|
858
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
859
|
+
tasks.put(task, "releaseVocoder-" + contextId);
|
860
|
+
}
|
861
|
+
|
862
|
+
public void isVocoderEnabled(double id, final Promise promise) {
|
863
|
+
final int contextId = (int) id;
|
864
|
+
AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
|
865
|
+
private Exception exception;
|
866
|
+
|
867
|
+
@Override
|
868
|
+
protected Boolean doInBackground(Void... voids) {
|
869
|
+
try {
|
870
|
+
LlamaContext context = contexts.get(contextId);
|
871
|
+
if (context == null) {
|
872
|
+
throw new Exception("Context not found");
|
873
|
+
}
|
874
|
+
return context.isVocoderEnabled();
|
875
|
+
} catch (Exception e) {
|
876
|
+
exception = e;
|
877
|
+
}
|
878
|
+
return false;
|
879
|
+
}
|
880
|
+
|
881
|
+
@Override
|
882
|
+
protected void onPostExecute(Boolean result) {
|
883
|
+
if (exception != null) {
|
884
|
+
promise.reject(exception);
|
885
|
+
return;
|
886
|
+
}
|
887
|
+
promise.resolve(result);
|
888
|
+
tasks.remove(this);
|
889
|
+
}
|
890
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
891
|
+
tasks.put(task, "isVocoderEnabled-" + contextId);
|
892
|
+
}
|
893
|
+
|
894
|
+
public void getFormattedAudioCompletion(double id, final String speakerJsonStr, final String textToSpeak, Promise promise) {
|
895
|
+
final int contextId = (int) id;
|
896
|
+
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
897
|
+
private Exception exception;
|
898
|
+
|
899
|
+
@Override
|
900
|
+
protected String doInBackground(Void... voids) {
|
901
|
+
try {
|
902
|
+
LlamaContext context = contexts.get(contextId);
|
903
|
+
if (context == null) {
|
904
|
+
throw new Exception("Context not found");
|
905
|
+
}
|
906
|
+
if (!context.isVocoderEnabled()) {
|
907
|
+
throw new Exception("Vocoder is not enabled");
|
908
|
+
}
|
909
|
+
return context.getFormattedAudioCompletion(speakerJsonStr, textToSpeak);
|
910
|
+
} catch (Exception e) {
|
911
|
+
exception = e;
|
912
|
+
return null;
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
@Override
|
917
|
+
protected void onPostExecute(String result) {
|
918
|
+
if (exception != null) {
|
919
|
+
promise.reject(exception);
|
920
|
+
return;
|
921
|
+
}
|
922
|
+
promise.resolve(result);
|
923
|
+
tasks.remove(this);
|
924
|
+
}
|
925
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
926
|
+
tasks.put(task, "getFormattedAudioCompletion-" + contextId);
|
927
|
+
}
|
928
|
+
|
929
|
+
public void getAudioCompletionGuideTokens(double id, final String textToSpeak, final Promise promise) {
|
930
|
+
final int contextId = (int) id;
|
931
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
|
932
|
+
private Exception exception;
|
933
|
+
|
934
|
+
@Override
|
935
|
+
protected WritableArray doInBackground(Void... voids) {
|
936
|
+
try {
|
937
|
+
LlamaContext context = contexts.get(contextId);
|
938
|
+
if (context == null) {
|
939
|
+
throw new Exception("Context not found");
|
940
|
+
}
|
941
|
+
if (!context.isVocoderEnabled()) {
|
942
|
+
throw new Exception("Vocoder is not enabled");
|
943
|
+
}
|
944
|
+
return context.getAudioCompletionGuideTokens(textToSpeak);
|
945
|
+
} catch (Exception e) {
|
946
|
+
exception = e;
|
947
|
+
return null;
|
948
|
+
}
|
949
|
+
}
|
950
|
+
|
951
|
+
@Override
|
952
|
+
protected void onPostExecute(WritableArray result) {
|
953
|
+
if (exception != null) {
|
954
|
+
promise.reject(exception);
|
955
|
+
return;
|
956
|
+
}
|
957
|
+
promise.resolve(result);
|
958
|
+
tasks.remove(this);
|
959
|
+
}
|
960
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
961
|
+
tasks.put(task, "getAudioCompletionGuideTokens-" + contextId);
|
962
|
+
}
|
963
|
+
|
964
|
+
public void decodeAudioTokens(double id, final ReadableArray tokens, final Promise promise) {
|
965
|
+
final int contextId = (int) id;
|
966
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
|
967
|
+
private Exception exception;
|
968
|
+
|
969
|
+
@Override
|
970
|
+
protected WritableArray doInBackground(Void... voids) {
|
971
|
+
try {
|
972
|
+
LlamaContext context = contexts.get(contextId);
|
973
|
+
if (context == null) {
|
974
|
+
throw new Exception("Context not found");
|
975
|
+
}
|
976
|
+
if (!context.isVocoderEnabled()) {
|
977
|
+
throw new Exception("Vocoder is not enabled");
|
978
|
+
}
|
979
|
+
return context.decodeAudioTokens(tokens);
|
980
|
+
} catch (Exception e) {
|
981
|
+
exception = e;
|
982
|
+
}
|
983
|
+
return null;
|
984
|
+
}
|
985
|
+
|
986
|
+
@Override
|
987
|
+
protected void onPostExecute(WritableArray result) {
|
988
|
+
if (exception != null) {
|
989
|
+
promise.reject(exception);
|
990
|
+
return;
|
991
|
+
}
|
992
|
+
promise.resolve(result);
|
993
|
+
tasks.remove(this);
|
994
|
+
}
|
995
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
996
|
+
tasks.put(task, "decodeAudioTokens-" + contextId);
|
997
|
+
}
|
998
|
+
|
762
999
|
public void releaseContext(double id, Promise promise) {
|
763
1000
|
final int contextId = (int) id;
|
764
1001
|
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
package/android/src/main/jni.cpp
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
#include <string>
|
10
10
|
#include <thread>
|
11
11
|
#include <unordered_map>
|
12
|
-
#include
|
12
|
+
#include <nlohmann/json.hpp>
|
13
13
|
#include "json-schema-to-grammar.h"
|
14
14
|
#include "llama.h"
|
15
15
|
#include "chat.h"
|
@@ -233,7 +233,6 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
233
233
|
jobject thiz,
|
234
234
|
jstring model_path_str,
|
235
235
|
jstring chat_template,
|
236
|
-
jstring reasoning_format,
|
237
236
|
jboolean embedding,
|
238
237
|
jint embd_normalize,
|
239
238
|
jint n_ctx,
|
@@ -271,13 +270,6 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
271
270
|
const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
|
272
271
|
defaultParams.chat_template = chat_template_chars;
|
273
272
|
|
274
|
-
const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
|
275
|
-
if (strcmp(reasoning_format_chars, "deepseek") == 0) {
|
276
|
-
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
277
|
-
} else {
|
278
|
-
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
279
|
-
}
|
280
|
-
|
281
273
|
defaultParams.n_ctx = n_ctx;
|
282
274
|
defaultParams.n_batch = n_batch;
|
283
275
|
defaultParams.n_ubatch = n_ubatch;
|
@@ -346,7 +338,6 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
346
338
|
|
347
339
|
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
348
340
|
env->ReleaseStringUTFChars(chat_template, chat_template_chars);
|
349
|
-
env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
|
350
341
|
env->ReleaseStringUTFChars(cache_type_k, cache_type_k_chars);
|
351
342
|
env->ReleaseStringUTFChars(cache_type_v, cache_type_v_chars);
|
352
343
|
|
@@ -493,7 +484,8 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
|
|
493
484
|
jstring json_schema,
|
494
485
|
jstring tools,
|
495
486
|
jboolean parallel_tool_calls,
|
496
|
-
jstring tool_choice
|
487
|
+
jstring tool_choice,
|
488
|
+
jboolean enable_thinking
|
497
489
|
) {
|
498
490
|
UNUSED(thiz);
|
499
491
|
auto llama = context_map[(long) context_ptr];
|
@@ -512,7 +504,8 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
|
|
512
504
|
json_schema_chars,
|
513
505
|
tools_chars,
|
514
506
|
parallel_tool_calls,
|
515
|
-
tool_choice_chars
|
507
|
+
tool_choice_chars,
|
508
|
+
enable_thinking
|
516
509
|
);
|
517
510
|
putString(env, result, "prompt", formatted.prompt.c_str());
|
518
511
|
putInt(env, result, "chat_format", static_cast<int>(formatted.format));
|
@@ -526,6 +519,7 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
|
|
526
519
|
putInt(env, trigger_map, "token", trigger.token);
|
527
520
|
pushMap(env, grammar_triggers, trigger_map);
|
528
521
|
}
|
522
|
+
putBoolean(env, result, "thinking_forced_open", formatted.thinking_forced_open);
|
529
523
|
putArray(env, result, "grammar_triggers", grammar_triggers);
|
530
524
|
auto preserved_tokens = createWritableArray(env);
|
531
525
|
for (const auto &token : formatted.preserved_tokens) {
|
@@ -537,7 +531,7 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
|
|
537
531
|
pushString(env, additional_stops, stop.c_str());
|
538
532
|
}
|
539
533
|
putArray(env, result, "additional_stops", additional_stops);
|
540
|
-
} catch (const nlohmann::
|
534
|
+
} catch (const nlohmann::json_abi_v3_12_0::detail::parse_error& e) {
|
541
535
|
std::string errorMessage = "JSON parse error in getFormattedChat: " + std::string(e.what());
|
542
536
|
putString(env, result, "_error", errorMessage.c_str());
|
543
537
|
LOGI("[RNLlama] %s", errorMessage.c_str());
|
@@ -668,18 +662,33 @@ static inline jobject tokenProbsToMap(
|
|
668
662
|
return result;
|
669
663
|
}
|
670
664
|
|
665
|
+
static inline jobject tokensToArray(
|
666
|
+
JNIEnv *env,
|
667
|
+
rnllama::llama_rn_context *llama,
|
668
|
+
std::vector<llama_token> tokens
|
669
|
+
) {
|
670
|
+
auto result = createWritableArray(env);
|
671
|
+
for (const auto &token : tokens) {
|
672
|
+
pushInt(env, result, token);
|
673
|
+
}
|
674
|
+
return result;
|
675
|
+
}
|
676
|
+
|
671
677
|
JNIEXPORT jobject JNICALL
|
672
678
|
Java_com_rnllama_LlamaContext_doCompletion(
|
673
679
|
JNIEnv *env,
|
674
680
|
jobject thiz,
|
675
681
|
jlong context_ptr,
|
676
682
|
jstring prompt,
|
683
|
+
jintArray guide_tokens,
|
677
684
|
jint chat_format,
|
685
|
+
jstring reasoning_format,
|
678
686
|
jstring grammar,
|
679
687
|
jstring json_schema,
|
680
688
|
jboolean grammar_lazy,
|
681
689
|
jobject grammar_triggers,
|
682
690
|
jobject preserved_tokens,
|
691
|
+
jboolean thinking_forced_open,
|
683
692
|
jfloat temperature,
|
684
693
|
jint n_threads,
|
685
694
|
jint n_predict,
|
@@ -722,6 +731,18 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
722
731
|
// Set the prompt parameter
|
723
732
|
llama->params.prompt = prompt_chars;
|
724
733
|
|
734
|
+
// Set the guide tokens parameter
|
735
|
+
if (guide_tokens != nullptr) {
|
736
|
+
int guide_tokens_size = env->GetArrayLength(guide_tokens);
|
737
|
+
int *guide_tokens_array = env->GetIntArrayElements(guide_tokens, nullptr);
|
738
|
+
std::vector<llama_token> guide_tokens_vector(guide_tokens_size);
|
739
|
+
for (int i = 0; i < guide_tokens_size; i++) {
|
740
|
+
guide_tokens_vector[i] = guide_tokens_array[i];
|
741
|
+
}
|
742
|
+
env->ReleaseIntArrayElements(guide_tokens, guide_tokens_array, 0);
|
743
|
+
llama->setGuideTokens(guide_tokens_vector);
|
744
|
+
}
|
745
|
+
|
725
746
|
// Process image paths if provided
|
726
747
|
std::vector<std::string> media_paths_vector;
|
727
748
|
|
@@ -908,6 +929,11 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
908
929
|
auto result = createWriteableMap(env);
|
909
930
|
putString(env, result, "error", e.what());
|
910
931
|
return reinterpret_cast<jobject>(result);
|
932
|
+
} catch (const std::runtime_error& e) {
|
933
|
+
llama->endCompletion();
|
934
|
+
auto result = createWriteableMap(env);
|
935
|
+
putString(env, result, "error", e.what());
|
936
|
+
return reinterpret_cast<jobject>(result);
|
911
937
|
}
|
912
938
|
|
913
939
|
if (llama->context_full) {
|
@@ -993,7 +1019,24 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
993
1019
|
auto toolCallsSize = 0;
|
994
1020
|
if (!llama->is_interrupted) {
|
995
1021
|
try {
|
996
|
-
|
1022
|
+
common_chat_syntax chat_syntax;
|
1023
|
+
chat_syntax.format = static_cast<common_chat_format>(chat_format);
|
1024
|
+
|
1025
|
+
const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
|
1026
|
+
if (strcmp(reasoning_format_chars, "deepseek") == 0) {
|
1027
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
1028
|
+
} else if (strcmp(reasoning_format_chars, "deepseek-legacy") == 0) {
|
1029
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
|
1030
|
+
} else {
|
1031
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
1032
|
+
}
|
1033
|
+
chat_syntax.thinking_forced_open = thinking_forced_open;
|
1034
|
+
env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
|
1035
|
+
common_chat_msg message = common_chat_parse(
|
1036
|
+
llama->generated_text,
|
1037
|
+
false,
|
1038
|
+
chat_syntax
|
1039
|
+
);
|
997
1040
|
if (!message.reasoning_content.empty()) {
|
998
1041
|
reasoningContent = message.reasoning_content;
|
999
1042
|
}
|
@@ -1027,6 +1070,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
1027
1070
|
if (toolCallsSize > 0) {
|
1028
1071
|
putArray(env, result, "tool_calls", toolCalls);
|
1029
1072
|
}
|
1073
|
+
putArray(env, result, "audio_tokens", tokensToArray(env, llama, llama->audio_tokens));
|
1030
1074
|
putArray(env, result, "completion_probabilities", tokenProbsToMap(env, llama, llama->generated_token_probs));
|
1031
1075
|
putInt(env, result, "tokens_predicted", llama->num_tokens_predicted);
|
1032
1076
|
putInt(env, result, "tokens_evaluated", llama->num_prompt_tokens);
|
@@ -1189,6 +1233,9 @@ Java_com_rnllama_LlamaContext_embedding(
|
|
1189
1233
|
} catch (const std::exception &e) {
|
1190
1234
|
putString(env, result, "error", e.what());
|
1191
1235
|
return reinterpret_cast<jobject>(result);
|
1236
|
+
} catch (const std::runtime_error& e) {
|
1237
|
+
putString(env, result, "error", e.what());
|
1238
|
+
return reinterpret_cast<jobject>(result);
|
1192
1239
|
}
|
1193
1240
|
llama->doCompletion();
|
1194
1241
|
|
@@ -1210,6 +1257,54 @@ Java_com_rnllama_LlamaContext_embedding(
|
|
1210
1257
|
return result;
|
1211
1258
|
}
|
1212
1259
|
|
1260
|
+
JNIEXPORT jobject JNICALL
|
1261
|
+
Java_com_rnllama_LlamaContext_rerank(
|
1262
|
+
JNIEnv *env, jobject thiz,
|
1263
|
+
jlong context_ptr,
|
1264
|
+
jstring query,
|
1265
|
+
jobjectArray documents,
|
1266
|
+
jint normalize
|
1267
|
+
) {
|
1268
|
+
UNUSED(thiz);
|
1269
|
+
auto llama = context_map[(long) context_ptr];
|
1270
|
+
|
1271
|
+
const char *query_chars = env->GetStringUTFChars(query, nullptr);
|
1272
|
+
|
1273
|
+
// Convert Java string array to C++ vector
|
1274
|
+
std::vector<std::string> documents_vector;
|
1275
|
+
int documents_size = env->GetArrayLength(documents);
|
1276
|
+
for (int i = 0; i < documents_size; i++) {
|
1277
|
+
jstring document = (jstring) env->GetObjectArrayElement(documents, i);
|
1278
|
+
const char *document_chars = env->GetStringUTFChars(document, nullptr);
|
1279
|
+
documents_vector.push_back(document_chars);
|
1280
|
+
env->ReleaseStringUTFChars(document, document_chars);
|
1281
|
+
}
|
1282
|
+
|
1283
|
+
auto result = createWritableArray(env);
|
1284
|
+
|
1285
|
+
try {
|
1286
|
+
std::vector<float> scores = llama->rerank(query_chars, documents_vector);
|
1287
|
+
|
1288
|
+
for (size_t i = 0; i < scores.size(); i++) {
|
1289
|
+
auto item = createWriteableMap(env);
|
1290
|
+
putDouble(env, item, "score", (double) scores[i]);
|
1291
|
+
putInt(env, item, "index", (int) i);
|
1292
|
+
pushMap(env, result, item);
|
1293
|
+
}
|
1294
|
+
} catch (const std::exception &e) {
|
1295
|
+
auto error_item = createWriteableMap(env);
|
1296
|
+
putString(env, error_item, "error", e.what());
|
1297
|
+
pushMap(env, result, error_item);
|
1298
|
+
} catch (const std::runtime_error& e) {
|
1299
|
+
auto error_item = createWriteableMap(env);
|
1300
|
+
putString(env, error_item, "error", e.what());
|
1301
|
+
pushMap(env, result, error_item);
|
1302
|
+
}
|
1303
|
+
|
1304
|
+
env->ReleaseStringUTFChars(query, query_chars);
|
1305
|
+
return result;
|
1306
|
+
}
|
1307
|
+
|
1213
1308
|
JNIEXPORT jstring JNICALL
|
1214
1309
|
Java_com_rnllama_LlamaContext_bench(
|
1215
1310
|
JNIEnv *env,
|
@@ -1415,4 +1510,108 @@ Java_com_rnllama_LlamaContext_releaseMultimodal(
|
|
1415
1510
|
llama->releaseMultimodal();
|
1416
1511
|
}
|
1417
1512
|
|
1513
|
+
JNIEXPORT jboolean JNICALL
|
1514
|
+
Java_com_rnllama_LlamaContext_initVocoder(
|
1515
|
+
JNIEnv *env,
|
1516
|
+
jobject thiz,
|
1517
|
+
jlong context_ptr,
|
1518
|
+
jstring vocoder_model_path
|
1519
|
+
) {
|
1520
|
+
UNUSED(env);
|
1521
|
+
UNUSED(thiz);
|
1522
|
+
auto llama = context_map[(long) context_ptr];
|
1523
|
+
const char *vocoder_model_path_chars = env->GetStringUTFChars(vocoder_model_path, nullptr);
|
1524
|
+
bool result = llama->initVocoder(vocoder_model_path_chars);
|
1525
|
+
env->ReleaseStringUTFChars(vocoder_model_path, vocoder_model_path_chars);
|
1526
|
+
return result;
|
1527
|
+
}
|
1528
|
+
|
1529
|
+
JNIEXPORT void JNICALL
|
1530
|
+
Java_com_rnllama_LlamaContext_releaseVocoder(
|
1531
|
+
JNIEnv *env,
|
1532
|
+
jobject thiz,
|
1533
|
+
jlong context_ptr
|
1534
|
+
) {
|
1535
|
+
UNUSED(env);
|
1536
|
+
UNUSED(thiz);
|
1537
|
+
auto llama = context_map[(long) context_ptr];
|
1538
|
+
llama->releaseVocoder();
|
1539
|
+
}
|
1540
|
+
|
1541
|
+
JNIEXPORT jboolean JNICALL
|
1542
|
+
Java_com_rnllama_LlamaContext_isVocoderEnabled(
|
1543
|
+
JNIEnv *env,
|
1544
|
+
jobject thiz,
|
1545
|
+
jlong context_ptr
|
1546
|
+
) {
|
1547
|
+
UNUSED(env);
|
1548
|
+
UNUSED(thiz);
|
1549
|
+
auto llama = context_map[(long) context_ptr];
|
1550
|
+
return llama->isVocoderEnabled();
|
1551
|
+
}
|
1552
|
+
|
1553
|
+
JNIEXPORT jstring JNICALL
|
1554
|
+
Java_com_rnllama_LlamaContext_getFormattedAudioCompletion(
|
1555
|
+
JNIEnv *env,
|
1556
|
+
jobject thiz,
|
1557
|
+
jlong context_ptr,
|
1558
|
+
jstring speaker_json_str,
|
1559
|
+
jstring text_to_speak
|
1560
|
+
) {
|
1561
|
+
UNUSED(env);
|
1562
|
+
UNUSED(thiz);
|
1563
|
+
auto llama = context_map[(long) context_ptr];
|
1564
|
+
const char *speaker_json_str_chars = env->GetStringUTFChars(speaker_json_str, nullptr);
|
1565
|
+
const char *text_to_speak_chars = env->GetStringUTFChars(text_to_speak, nullptr);
|
1566
|
+
std::string result = llama->getFormattedAudioCompletion(speaker_json_str_chars, text_to_speak_chars);
|
1567
|
+
env->ReleaseStringUTFChars(speaker_json_str, speaker_json_str_chars);
|
1568
|
+
env->ReleaseStringUTFChars(text_to_speak, text_to_speak_chars);
|
1569
|
+
return env->NewStringUTF(result.c_str());
|
1570
|
+
}
|
1571
|
+
|
1572
|
+
JNIEXPORT jobject JNICALL
|
1573
|
+
Java_com_rnllama_LlamaContext_getAudioCompletionGuideTokens(
|
1574
|
+
JNIEnv *env,
|
1575
|
+
jobject thiz,
|
1576
|
+
jlong context_ptr,
|
1577
|
+
jstring text_to_speak
|
1578
|
+
) {
|
1579
|
+
UNUSED(env);
|
1580
|
+
UNUSED(thiz);
|
1581
|
+
auto llama = context_map[(long) context_ptr];
|
1582
|
+
const char *text_to_speak_chars = env->GetStringUTFChars(text_to_speak, nullptr);
|
1583
|
+
std::vector<llama_token> guide_tokens = llama->getAudioCompletionGuideTokens(text_to_speak_chars);
|
1584
|
+
env->ReleaseStringUTFChars(text_to_speak, text_to_speak_chars);
|
1585
|
+
auto result = createWritableArray(env);
|
1586
|
+
for (const auto &val : guide_tokens) {
|
1587
|
+
pushInt(env, result, (int) val);
|
1588
|
+
}
|
1589
|
+
return result;
|
1590
|
+
}
|
1591
|
+
|
1592
|
+
JNIEXPORT jobject JNICALL
|
1593
|
+
Java_com_rnllama_LlamaContext_decodeAudioTokens(
|
1594
|
+
JNIEnv *env,
|
1595
|
+
jobject thiz,
|
1596
|
+
jlong context_ptr,
|
1597
|
+
jintArray tokens
|
1598
|
+
) {
|
1599
|
+
UNUSED(env);
|
1600
|
+
UNUSED(thiz);
|
1601
|
+
auto llama = context_map[(long) context_ptr];
|
1602
|
+
jsize tokens_size = env->GetArrayLength(tokens);
|
1603
|
+
jint *tokens_ptr = env->GetIntArrayElements(tokens, nullptr);
|
1604
|
+
std::vector<llama_token> tokens_vec(tokens_size);
|
1605
|
+
for (int i = 0; i < tokens_size; i++) {
|
1606
|
+
tokens_vec[i] = tokens_ptr[i];
|
1607
|
+
}
|
1608
|
+
env->ReleaseIntArrayElements(tokens, tokens_ptr, 0);
|
1609
|
+
std::vector<float> audio = llama->decodeAudioTokens(tokens_vec);
|
1610
|
+
auto result = createWritableArray(env);
|
1611
|
+
for (const auto &val : audio) {
|
1612
|
+
pushDouble(env, result, (double) val);
|
1613
|
+
}
|
1614
|
+
return result;
|
1615
|
+
}
|
1616
|
+
|
1418
1617
|
} // extern "C"
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -123,6 +123,11 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
|
|
123
123
|
rnllama.embedding(id, text, params, promise);
|
124
124
|
}
|
125
125
|
|
126
|
+
@ReactMethod
|
127
|
+
public void rerank(double id, final String query, final ReadableArray documents, final ReadableMap params, final Promise promise) {
|
128
|
+
rnllama.rerank(id, query, documents, params, promise);
|
129
|
+
}
|
130
|
+
|
126
131
|
@ReactMethod
|
127
132
|
public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
|
128
133
|
rnllama.bench(id, pp, tg, pl, nr, promise);
|
@@ -143,6 +148,36 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
|
|
143
148
|
rnllama.getLoadedLoraAdapters(id, promise);
|
144
149
|
}
|
145
150
|
|
151
|
+
@ReactMethod
|
152
|
+
public void initVocoder(double id, final String vocoderModelPath, final Promise promise) {
|
153
|
+
rnllama.initVocoder(id, vocoderModelPath, promise);
|
154
|
+
}
|
155
|
+
|
156
|
+
@ReactMethod
|
157
|
+
public void isVocoderEnabled(double id, final Promise promise) {
|
158
|
+
rnllama.isVocoderEnabled(id, promise);
|
159
|
+
}
|
160
|
+
|
161
|
+
@ReactMethod
|
162
|
+
public void getFormattedAudioCompletion(double id, final String speakerJsonStr, final String textToSpeak, final Promise promise) {
|
163
|
+
rnllama.getFormattedAudioCompletion(id, speakerJsonStr, textToSpeak, promise);
|
164
|
+
}
|
165
|
+
|
166
|
+
@ReactMethod
|
167
|
+
public void getAudioCompletionGuideTokens(double id, final String textToSpeak, final Promise promise) {
|
168
|
+
rnllama.getAudioCompletionGuideTokens(id, textToSpeak, promise);
|
169
|
+
}
|
170
|
+
|
171
|
+
@ReactMethod
|
172
|
+
public void decodeAudioTokens(double id, final ReadableArray tokens, final Promise promise) {
|
173
|
+
rnllama.decodeAudioTokens(id, tokens, promise);
|
174
|
+
}
|
175
|
+
|
176
|
+
@ReactMethod
|
177
|
+
public void releaseVocoder(double id, final Promise promise) {
|
178
|
+
rnllama.releaseVocoder(id, promise);
|
179
|
+
}
|
180
|
+
|
146
181
|
@ReactMethod
|
147
182
|
public void releaseContext(double id, Promise promise) {
|
148
183
|
rnllama.releaseContext(id, promise);
|