cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
// __ _____ _____ _____
|
2
|
+
// __| | __| | | | JSON for Modern C++
|
3
|
+
// | | |__ | | | | | | version 3.12.0
|
4
|
+
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
|
5
|
+
//
|
6
|
+
// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
|
7
|
+
// SPDX-License-Identifier: MIT
|
8
|
+
|
9
|
+
#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
10
|
+
#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
11
|
+
|
12
|
+
#include <cstdint> // int64_t, uint64_t
|
13
|
+
#include <map> // map
|
14
|
+
#include <memory> // allocator
|
15
|
+
#include <string> // string
|
16
|
+
#include <vector> // vector
|
17
|
+
|
18
|
+
// #include <nlohmann/detail/abi_macros.hpp>
|
19
|
+
// __ _____ _____ _____
|
20
|
+
// __| | __| | | | JSON for Modern C++
|
21
|
+
// | | |__ | | | | | | version 3.12.0
|
22
|
+
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
|
23
|
+
//
|
24
|
+
// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
|
25
|
+
// SPDX-License-Identifier: MIT
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
// This file contains all macro definitions affecting or depending on the ABI
|
30
|
+
|
31
|
+
#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
|
32
|
+
#if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
|
33
|
+
#if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0
|
34
|
+
#warning "Already included a different version of the library!"
|
35
|
+
#endif
|
36
|
+
#endif
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
|
40
|
+
#define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum)
|
41
|
+
#define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum)
|
42
|
+
|
43
|
+
#ifndef JSON_DIAGNOSTICS
|
44
|
+
#define JSON_DIAGNOSTICS 0
|
45
|
+
#endif
|
46
|
+
|
47
|
+
#ifndef JSON_DIAGNOSTIC_POSITIONS
|
48
|
+
#define JSON_DIAGNOSTIC_POSITIONS 0
|
49
|
+
#endif
|
50
|
+
|
51
|
+
#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
|
52
|
+
#define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
|
53
|
+
#endif
|
54
|
+
|
55
|
+
#if JSON_DIAGNOSTICS
|
56
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
|
57
|
+
#else
|
58
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#if JSON_DIAGNOSTIC_POSITIONS
|
62
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp
|
63
|
+
#else
|
64
|
+
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS
|
65
|
+
#endif
|
66
|
+
|
67
|
+
#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
|
68
|
+
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
|
69
|
+
#else
|
70
|
+
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
|
74
|
+
#define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
|
75
|
+
#endif
|
76
|
+
|
77
|
+
// Construct the namespace ABI tags component
|
78
|
+
#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c
|
79
|
+
#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \
|
80
|
+
NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c)
|
81
|
+
|
82
|
+
#define NLOHMANN_JSON_ABI_TAGS \
|
83
|
+
NLOHMANN_JSON_ABI_TAGS_CONCAT( \
|
84
|
+
NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
|
85
|
+
NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \
|
86
|
+
NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS)
|
87
|
+
|
88
|
+
// Construct the namespace version component
|
89
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
|
90
|
+
_v ## major ## _ ## minor ## _ ## patch
|
91
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
|
92
|
+
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
|
93
|
+
|
94
|
+
#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
|
95
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION
|
96
|
+
#else
|
97
|
+
#define NLOHMANN_JSON_NAMESPACE_VERSION \
|
98
|
+
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
|
99
|
+
NLOHMANN_JSON_VERSION_MINOR, \
|
100
|
+
NLOHMANN_JSON_VERSION_PATCH)
|
101
|
+
#endif
|
102
|
+
|
103
|
+
// Combine namespace components
|
104
|
+
#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
|
105
|
+
#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
|
106
|
+
NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
|
107
|
+
|
108
|
+
#ifndef NLOHMANN_JSON_NAMESPACE
|
109
|
+
#define NLOHMANN_JSON_NAMESPACE \
|
110
|
+
nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
|
111
|
+
NLOHMANN_JSON_ABI_TAGS, \
|
112
|
+
NLOHMANN_JSON_NAMESPACE_VERSION)
|
113
|
+
#endif
|
114
|
+
|
115
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
|
116
|
+
#define NLOHMANN_JSON_NAMESPACE_BEGIN \
|
117
|
+
namespace nlohmann \
|
118
|
+
{ \
|
119
|
+
inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
|
120
|
+
NLOHMANN_JSON_ABI_TAGS, \
|
121
|
+
NLOHMANN_JSON_NAMESPACE_VERSION) \
|
122
|
+
{
|
123
|
+
#endif
|
124
|
+
|
125
|
+
#ifndef NLOHMANN_JSON_NAMESPACE_END
|
126
|
+
#define NLOHMANN_JSON_NAMESPACE_END \
|
127
|
+
} /* namespace (inline namespace) NOLINT(readability/namespace) */ \
|
128
|
+
} // namespace nlohmann
|
129
|
+
#endif
|
130
|
+
|
131
|
+
|
132
|
+
/*!
|
133
|
+
@brief namespace for Niels Lohmann
|
134
|
+
@see https://github.com/nlohmann
|
135
|
+
@since version 1.0.0
|
136
|
+
*/
|
137
|
+
NLOHMANN_JSON_NAMESPACE_BEGIN
|
138
|
+
|
139
|
+
/*!
|
140
|
+
@brief default JSONSerializer template argument
|
141
|
+
|
142
|
+
This serializer ignores the template arguments and uses ADL
|
143
|
+
([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
|
144
|
+
for serialization.
|
145
|
+
*/
|
146
|
+
template<typename T = void, typename SFINAE = void>
|
147
|
+
struct adl_serializer;
|
148
|
+
|
149
|
+
/// a class to store JSON values
|
150
|
+
/// @sa https://json.nlohmann.me/api/basic_json/
|
151
|
+
template<template<typename U, typename V, typename... Args> class ObjectType =
|
152
|
+
std::map,
|
153
|
+
template<typename U, typename... Args> class ArrayType = std::vector,
|
154
|
+
class StringType = std::string, class BooleanType = bool,
|
155
|
+
class NumberIntegerType = std::int64_t,
|
156
|
+
class NumberUnsignedType = std::uint64_t,
|
157
|
+
class NumberFloatType = double,
|
158
|
+
template<typename U> class AllocatorType = std::allocator,
|
159
|
+
template<typename T, typename SFINAE = void> class JSONSerializer =
|
160
|
+
adl_serializer,
|
161
|
+
class BinaryType = std::vector<std::uint8_t>, // cppcheck-suppress syntaxError
|
162
|
+
class CustomBaseClass = void>
|
163
|
+
class basic_json;
|
164
|
+
|
165
|
+
/// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
|
166
|
+
/// @sa https://json.nlohmann.me/api/json_pointer/
|
167
|
+
template<typename RefStringType>
|
168
|
+
class json_pointer;
|
169
|
+
|
170
|
+
/*!
|
171
|
+
@brief default specialization
|
172
|
+
@sa https://json.nlohmann.me/api/json/
|
173
|
+
*/
|
174
|
+
using json = basic_json<>;
|
175
|
+
|
176
|
+
/// @brief a minimal map-like container that preserves insertion order
|
177
|
+
/// @sa https://json.nlohmann.me/api/ordered_map/
|
178
|
+
template<class Key, class T, class IgnoredLess, class Allocator>
|
179
|
+
struct ordered_map;
|
180
|
+
|
181
|
+
/// @brief specialization that maintains the insertion order of object keys
|
182
|
+
/// @sa https://json.nlohmann.me/api/ordered_json/
|
183
|
+
using ordered_json = basic_json<nlohmann::ordered_map>;
|
184
|
+
|
185
|
+
NLOHMANN_JSON_NAMESPACE_END
|
186
|
+
|
187
|
+
#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <regex>
|
4
|
+
#include <string>
|
5
|
+
|
6
|
+
enum common_regex_match_type {
|
7
|
+
COMMON_REGEX_MATCH_TYPE_NONE,
|
8
|
+
COMMON_REGEX_MATCH_TYPE_PARTIAL,
|
9
|
+
COMMON_REGEX_MATCH_TYPE_FULL,
|
10
|
+
};
|
11
|
+
|
12
|
+
struct common_string_range {
|
13
|
+
size_t begin;
|
14
|
+
size_t end;
|
15
|
+
common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
|
16
|
+
if (begin > end) {
|
17
|
+
throw std::runtime_error("Invalid range");
|
18
|
+
}
|
19
|
+
}
|
20
|
+
// prevent default ctor
|
21
|
+
common_string_range() = delete;
|
22
|
+
bool empty() const {
|
23
|
+
return begin == end;
|
24
|
+
}
|
25
|
+
bool operator==(const common_string_range & other) const {
|
26
|
+
return begin == other.begin && end == other.end;
|
27
|
+
}
|
28
|
+
};
|
29
|
+
|
30
|
+
struct common_regex_match {
|
31
|
+
common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
|
32
|
+
std::vector<common_string_range> groups;
|
33
|
+
|
34
|
+
bool operator==(const common_regex_match & other) const {
|
35
|
+
return type == other.type && groups == other.groups;
|
36
|
+
}
|
37
|
+
bool operator!=(const common_regex_match & other) const {
|
38
|
+
return !(*this == other);
|
39
|
+
}
|
40
|
+
};
|
41
|
+
|
42
|
+
class common_regex {
|
43
|
+
std::string pattern;
|
44
|
+
std::regex rx;
|
45
|
+
std::regex rx_reversed_partial;
|
46
|
+
|
47
|
+
public:
|
48
|
+
explicit common_regex(const std::string & pattern);
|
49
|
+
|
50
|
+
common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
|
51
|
+
|
52
|
+
const std::string & str() const { return pattern; }
|
53
|
+
};
|
54
|
+
|
55
|
+
// For testing only (pretty print of failures).
|
56
|
+
std::string regex_to_reversed_partial_regex(const std::string & pattern);
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
#include <sstream>
|
5
5
|
#include <iostream>
|
6
|
+
#include <thread>
|
6
7
|
#include "chat.h"
|
7
8
|
#include "common.h"
|
8
9
|
#include "ggml.h"
|
@@ -10,10 +11,13 @@
|
|
10
11
|
#include "llama.h"
|
11
12
|
#include "llama-impl.h"
|
12
13
|
#include "sampling.h"
|
14
|
+
#include "nlohmann/json.hpp"
|
13
15
|
#if defined(__ANDROID__)
|
14
16
|
#include <android/log.h>
|
15
17
|
#endif
|
16
18
|
|
19
|
+
using json = nlohmann::ordered_json;
|
20
|
+
|
17
21
|
namespace rnllama {
|
18
22
|
|
19
23
|
std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
|
@@ -43,6 +47,8 @@ struct completion_token_output
|
|
43
47
|
|
44
48
|
struct llama_rn_context_mtmd;
|
45
49
|
|
50
|
+
struct llama_rn_context_vocoder;
|
51
|
+
|
46
52
|
struct llama_rn_tokenize_result {
|
47
53
|
std::vector<llama_token> tokens;
|
48
54
|
bool has_media = false;
|
@@ -51,6 +57,12 @@ struct llama_rn_tokenize_result {
|
|
51
57
|
std::vector<size_t> chunk_pos_media; // media only
|
52
58
|
};
|
53
59
|
|
60
|
+
enum tts_type {
|
61
|
+
UNKNOWN = -1,
|
62
|
+
OUTETTS_V0_2 = 1,
|
63
|
+
OUTETTS_V0_3 = 2,
|
64
|
+
};
|
65
|
+
|
54
66
|
// Main context class
|
55
67
|
struct llama_rn_context {
|
56
68
|
bool is_predicting = false;
|
@@ -58,6 +70,7 @@ struct llama_rn_context {
|
|
58
70
|
bool has_next_token = false;
|
59
71
|
std::string generated_text;
|
60
72
|
std::vector<completion_token_output> generated_token_probs;
|
73
|
+
std::vector<llama_token> audio_tokens;
|
61
74
|
|
62
75
|
size_t num_prompt_tokens = 0;
|
63
76
|
size_t num_tokens_predicted = 0;
|
@@ -69,6 +82,9 @@ struct llama_rn_context {
|
|
69
82
|
common_params params;
|
70
83
|
common_init_result llama_init;
|
71
84
|
|
85
|
+
bool next_token_uses_guide_token = true;
|
86
|
+
std::vector<llama_token> guide_tokens;
|
87
|
+
|
72
88
|
llama_model *model = nullptr;
|
73
89
|
float loading_progress = 0;
|
74
90
|
bool is_load_interrupted = false;
|
@@ -92,6 +108,9 @@ struct llama_rn_context {
|
|
92
108
|
llama_rn_context_mtmd *mtmd_wrapper = nullptr;
|
93
109
|
bool has_multimodal = false;
|
94
110
|
|
111
|
+
llama_rn_context_vocoder *vocoder_wrapper = nullptr;
|
112
|
+
bool has_vocoder = false;
|
113
|
+
|
95
114
|
~llama_rn_context();
|
96
115
|
|
97
116
|
void rewind();
|
@@ -104,7 +123,8 @@ struct llama_rn_context {
|
|
104
123
|
const std::string &json_schema,
|
105
124
|
const std::string &tools,
|
106
125
|
const bool ¶llel_tool_calls,
|
107
|
-
const std::string &tool_choice
|
126
|
+
const std::string &tool_choice,
|
127
|
+
const bool &enable_thinking
|
108
128
|
) const;
|
109
129
|
std::string getFormattedChat(
|
110
130
|
const std::string &messages,
|
@@ -112,12 +132,14 @@ struct llama_rn_context {
|
|
112
132
|
) const;
|
113
133
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens);
|
114
134
|
void loadPrompt(const std::vector<std::string> &media_paths);
|
135
|
+
void setGuideTokens(const std::vector<llama_token> &tokens);
|
115
136
|
void beginCompletion();
|
116
137
|
void endCompletion();
|
117
138
|
completion_token_output nextToken();
|
118
139
|
size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
|
119
140
|
completion_token_output doCompletion();
|
120
141
|
std::vector<float> getEmbedding(common_params &embd_params);
|
142
|
+
std::vector<float> rerank(const std::string &query, const std::vector<std::string> &documents);
|
121
143
|
std::string bench(int pp, int tg, int pl, int nr);
|
122
144
|
int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
|
123
145
|
void removeLoraAdapters();
|
@@ -137,6 +159,15 @@ struct llama_rn_context {
|
|
137
159
|
);
|
138
160
|
|
139
161
|
llama_rn_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
|
162
|
+
|
163
|
+
// Vocoder methods
|
164
|
+
bool initVocoder(const std::string &vocoder_model_path);
|
165
|
+
tts_type getTTSType(json speaker = nullptr);
|
166
|
+
std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
|
167
|
+
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
168
|
+
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
169
|
+
bool isVocoderEnabled() const;
|
170
|
+
void releaseVocoder();
|
140
171
|
};
|
141
172
|
|
142
173
|
// Logging macros
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
namespace rnllama {
|
4
|
+
|
5
|
+
// the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
|
6
|
+
static const std::string default_audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
|
7
|
+
static const std::string default_audio_data = R"(<|audio_start|>
|
8
|
+
the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
|
9
|
+
overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
|
10
|
+
package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
|
11
|
+
from<|t_0.19|><|code_start|><|604|><|782|><|1682|><|872|><|1532|><|1600|><|1036|><|1761|><|647|><|1554|><|1371|><|653|><|1595|><|950|><|code_end|>
|
12
|
+
just<|t_0.25|><|code_start|><|1782|><|1670|><|317|><|786|><|1748|><|631|><|599|><|1155|><|1364|><|1524|><|36|><|1591|><|889|><|1535|><|541|><|440|><|1532|><|50|><|870|><|code_end|>
|
13
|
+
two<|t_0.24|><|code_start|><|1681|><|1510|><|673|><|799|><|805|><|1342|><|330|><|519|><|62|><|640|><|1138|><|565|><|1552|><|1497|><|1552|><|572|><|1715|><|1732|><|code_end|>
|
14
|
+
people<|t_0.39|><|code_start|><|593|><|274|><|136|><|740|><|691|><|633|><|1484|><|1061|><|1138|><|1485|><|344|><|428|><|397|><|1562|><|645|><|917|><|1035|><|1449|><|1669|><|487|><|442|><|1484|><|1329|><|1832|><|1704|><|600|><|761|><|653|><|269|><|code_end|>
|
15
|
+
is<|t_0.16|><|code_start|><|566|><|583|><|1755|><|646|><|1337|><|709|><|802|><|1008|><|485|><|1583|><|652|><|10|><|code_end|>
|
16
|
+
pretty<|t_0.32|><|code_start|><|1818|><|1747|><|692|><|733|><|1010|><|534|><|406|><|1697|><|1053|><|1521|><|1355|><|1274|><|816|><|1398|><|211|><|1218|><|817|><|1472|><|1703|><|686|><|13|><|822|><|445|><|1068|><|code_end|>
|
17
|
+
remarkable<|t_0.68|><|code_start|><|230|><|1048|><|1705|><|355|><|706|><|1149|><|1535|><|1787|><|1356|><|1396|><|835|><|1583|><|486|><|1249|><|286|><|937|><|1076|><|1150|><|614|><|42|><|1058|><|705|><|681|><|798|><|934|><|490|><|514|><|1399|><|572|><|1446|><|1703|><|1346|><|1040|><|1426|><|1304|><|664|><|171|><|1530|><|625|><|64|><|1708|><|1830|><|1030|><|443|><|1509|><|1063|><|1605|><|1785|><|721|><|1440|><|923|><|code_end|>
|
18
|
+
sure<|t_0.36|><|code_start|><|792|><|1780|><|923|><|1640|><|265|><|261|><|1525|><|567|><|1491|><|1250|><|1730|><|362|><|919|><|1766|><|543|><|1|><|333|><|113|><|970|><|252|><|1606|><|133|><|302|><|1810|><|1046|><|1190|><|1675|><|code_end|>
|
19
|
+
i<|t_0.08|><|code_start|><|123|><|439|><|1074|><|705|><|1799|><|637|><|code_end|>
|
20
|
+
have<|t_0.16|><|code_start|><|1509|><|599|><|518|><|1170|><|552|><|1029|><|1267|><|864|><|419|><|143|><|1061|><|0|><|code_end|>
|
21
|
+
some<|t_0.16|><|code_start|><|619|><|400|><|1270|><|62|><|1370|><|1832|><|917|><|1661|><|167|><|269|><|1366|><|1508|><|code_end|>
|
22
|
+
critiques<|t_0.60|><|code_start|><|559|><|584|><|1163|><|1129|><|1313|><|1728|><|721|><|1146|><|1093|><|577|><|928|><|27|><|630|><|1080|><|1346|><|1337|><|320|><|1382|><|1175|><|1682|><|1556|><|990|><|1683|><|860|><|1721|><|110|><|786|><|376|><|1085|><|756|><|1523|><|234|><|1334|><|1506|><|1578|><|659|><|612|><|1108|><|1466|><|1647|><|308|><|1470|><|746|><|556|><|1061|><|code_end|>
|
23
|
+
about<|t_0.29|><|code_start|><|26|><|1649|><|545|><|1367|><|1263|><|1728|><|450|><|859|><|1434|><|497|><|1220|><|1285|><|179|><|755|><|1154|><|779|><|179|><|1229|><|1213|><|922|><|1774|><|1408|><|code_end|>
|
24
|
+
some<|t_0.23|><|code_start|><|986|><|28|><|1649|><|778|><|858|><|1519|><|1|><|18|><|26|><|1042|><|1174|><|1309|><|1499|><|1712|><|1692|><|1516|><|1574|><|code_end|>
|
25
|
+
of<|t_0.07|><|code_start|><|197|><|716|><|1039|><|1662|><|64|><|code_end|>
|
26
|
+
the<|t_0.08|><|code_start|><|1811|><|1568|><|569|><|886|><|1025|><|1374|><|code_end|>
|
27
|
+
gameplay<|t_0.48|><|code_start|><|1269|><|1092|><|933|><|1362|><|1762|><|1700|><|1675|><|215|><|781|><|1086|><|461|><|838|><|1022|><|759|><|649|><|1416|><|1004|><|551|><|909|><|787|><|343|><|830|><|1391|><|1040|><|1622|><|1779|><|1360|><|1231|><|1187|><|1317|><|76|><|997|><|989|><|978|><|737|><|189|><|code_end|>
|
28
|
+
aspects<|t_0.56|><|code_start|><|1423|><|797|><|1316|><|1222|><|147|><|719|><|1347|><|386|><|1390|><|1558|><|154|><|440|><|634|><|592|><|1097|><|1718|><|712|><|763|><|1118|><|1721|><|1311|><|868|><|580|><|362|><|1435|><|868|><|247|><|221|><|886|><|1145|><|1274|><|1284|><|457|><|1043|><|1459|><|1818|><|62|><|599|><|1035|><|62|><|1649|><|778|><|code_end|>
|
29
|
+
but<|t_0.20|><|code_start|><|780|><|1825|><|1681|><|1007|><|861|><|710|><|702|><|939|><|1669|><|1491|><|613|><|1739|><|823|><|1469|><|648|><|code_end|>
|
30
|
+
its<|t_0.09|><|code_start|><|92|><|688|><|1623|><|962|><|1670|><|527|><|599|><|code_end|>
|
31
|
+
still<|t_0.27|><|code_start|><|636|><|10|><|1217|><|344|><|713|><|957|><|823|><|154|><|1649|><|1286|><|508|><|214|><|1760|><|1250|><|456|><|1352|><|1368|><|921|><|615|><|5|><|code_end|>
|
32
|
+
really<|t_0.36|><|code_start|><|55|><|420|><|1008|><|1659|><|27|><|644|><|1266|><|617|><|761|><|1712|><|109|><|1465|><|1587|><|503|><|1541|><|619|><|197|><|1019|><|817|><|269|><|377|><|362|><|1381|><|507|><|1488|><|4|><|1695|><|code_end|>
|
33
|
+
enjoyable<|t_0.49|><|code_start|><|678|><|501|><|864|><|319|><|288|><|1472|><|1341|><|686|><|562|><|1463|><|619|><|1563|><|471|><|911|><|730|><|1811|><|1006|><|520|><|861|><|1274|><|125|><|1431|><|638|><|621|><|153|><|876|><|1770|><|437|><|987|><|1653|><|1109|><|898|><|1285|><|80|><|593|><|1709|><|843|><|code_end|>
|
34
|
+
and<|t_0.15|><|code_start|><|1285|><|987|><|303|><|1037|><|730|><|1164|><|502|><|120|><|1737|><|1655|><|1318|><|code_end|>
|
35
|
+
it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><|code_end|>
|
36
|
+
looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
|
37
|
+
lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
|
38
|
+
|
39
|
+
}
|
Binary file
|
Binary file
|
package/jest/mock.js
CHANGED
@@ -4,6 +4,7 @@ if (!NativeModules.RNLlama) {
|
|
4
4
|
const demoEmbedding = new Array(768).fill(0.01)
|
5
5
|
|
6
6
|
const contextMap = {}
|
7
|
+
const vocoderMap = {}
|
7
8
|
NativeModules.RNLlama = {
|
8
9
|
setContextLimit: jest.fn(),
|
9
10
|
|
@@ -53,6 +54,9 @@ if (!NativeModules.RNLlama) {
|
|
53
54
|
|
54
55
|
completion: jest.fn(async (contextId, jobId) => {
|
55
56
|
const testResult = {
|
57
|
+
audio_tokens: [
|
58
|
+
1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010,
|
59
|
+
],
|
56
60
|
text: '*giggles*',
|
57
61
|
completion_probabilities: [
|
58
62
|
{
|
@@ -192,6 +196,7 @@ if (!NativeModules.RNLlama) {
|
|
192
196
|
})),
|
193
197
|
detokenize: jest.fn(async () => ''),
|
194
198
|
embedding: jest.fn(async () => ({ embedding: demoEmbedding })),
|
199
|
+
rerank: jest.fn(async () => []),
|
195
200
|
|
196
201
|
loadSession: jest.fn(async () => ({
|
197
202
|
tokens_loaded: 1,
|
@@ -223,6 +228,25 @@ if (!NativeModules.RNLlama) {
|
|
223
228
|
releaseMultimodal: jest.fn(async (id) => {
|
224
229
|
delete contextMap[id]
|
225
230
|
}),
|
231
|
+
|
232
|
+
initVocoder: jest.fn(async (id) => {
|
233
|
+
vocoderMap[id] = true
|
234
|
+
return true
|
235
|
+
}),
|
236
|
+
releaseVocoder: jest.fn(async (id) => {
|
237
|
+
delete vocoderMap[id]
|
238
|
+
}),
|
239
|
+
isVocoderEnabled: jest.fn(async (id) => vocoderMap[id] || false),
|
240
|
+
getFormattedAudioCompletion: jest.fn(
|
241
|
+
async (id, speakerJsonStr, textToSpeak) =>
|
242
|
+
`${speakerJsonStr || '<default speaker>'}<sep>${textToSpeak}`,
|
243
|
+
),
|
244
|
+
getAudioCompletionGuideTokens: jest.fn(async (id, textToSpeak) =>
|
245
|
+
textToSpeak.split('').map((char) => char.charCodeAt(0) + 1000),
|
246
|
+
),
|
247
|
+
decodeAudioTokens: jest.fn(async (id, tokens) =>
|
248
|
+
tokens.map((token) => token - 1000).map((token) => token / 1024),
|
249
|
+
),
|
226
250
|
}
|
227
251
|
}
|
228
252
|
|
package/package.json
CHANGED
package/src/NativeRNLlama.ts
CHANGED
@@ -12,8 +12,6 @@ export type NativeContextParams = {
|
|
12
12
|
*/
|
13
13
|
chat_template?: string
|
14
14
|
|
15
|
-
reasoning_format?: string
|
16
|
-
|
17
15
|
is_model_asset?: boolean
|
18
16
|
use_progress_callback?: boolean
|
19
17
|
|
@@ -81,6 +79,10 @@ export type NativeContextParams = {
|
|
81
79
|
export type NativeCompletionParams = {
|
82
80
|
prompt: string
|
83
81
|
n_threads?: number
|
82
|
+
/**
|
83
|
+
* Enable Jinja. Default: true if supported by the model
|
84
|
+
*/
|
85
|
+
jinja?: boolean
|
84
86
|
/**
|
85
87
|
* JSON schema for convert to grammar for structured JSON output.
|
86
88
|
* It will be override by grammar if both are set.
|
@@ -94,6 +96,14 @@ export type NativeCompletionParams = {
|
|
94
96
|
* Lazy grammar sampling, trigger by grammar_triggers. Default: false
|
95
97
|
*/
|
96
98
|
grammar_lazy?: boolean
|
99
|
+
/**
|
100
|
+
* Enable thinking if jinja is enabled. Default: true
|
101
|
+
*/
|
102
|
+
enable_thinking?: boolean
|
103
|
+
/**
|
104
|
+
* Force thinking to be open. Default: false
|
105
|
+
*/
|
106
|
+
thinking_forced_open?: boolean
|
97
107
|
/**
|
98
108
|
* Lazy grammar triggers. Default: []
|
99
109
|
*/
|
@@ -104,6 +114,7 @@ export type NativeCompletionParams = {
|
|
104
114
|
}>
|
105
115
|
preserved_tokens?: Array<string>
|
106
116
|
chat_format?: number
|
117
|
+
reasoning_format?: string
|
107
118
|
/**
|
108
119
|
* Path to an image file to process before generating text.
|
109
120
|
* When provided, the image will be processed and added to the context.
|
@@ -225,6 +236,13 @@ export type NativeCompletionParams = {
|
|
225
236
|
*/
|
226
237
|
seed?: number
|
227
238
|
|
239
|
+
/**
|
240
|
+
* Guide tokens for the completion.
|
241
|
+
* Help prevent hallucinations by forcing the TTS to use the correct words.
|
242
|
+
* Default: `[]`
|
243
|
+
*/
|
244
|
+
guide_tokens?: Array<number>
|
245
|
+
|
228
246
|
emit_partial_completion: boolean
|
229
247
|
}
|
230
248
|
|
@@ -285,6 +303,7 @@ export type NativeCompletionResult = {
|
|
285
303
|
timings: NativeCompletionResultTimings
|
286
304
|
|
287
305
|
completion_probabilities?: Array<NativeCompletionTokenProb>
|
306
|
+
audio_tokens?: Array<number>
|
288
307
|
}
|
289
308
|
|
290
309
|
export type NativeTokenizeResult = {
|
@@ -390,6 +409,7 @@ export type JinjaFormattedChatResult = FormattedChatResult & {
|
|
390
409
|
value: string
|
391
410
|
token: number
|
392
411
|
}>
|
412
|
+
thinking_forced_open?: boolean
|
393
413
|
preserved_tokens?: Array<string>
|
394
414
|
additional_stops?: Array<string>
|
395
415
|
}
|
@@ -400,6 +420,15 @@ export type NativeImageProcessingResult = {
|
|
400
420
|
error?: string
|
401
421
|
}
|
402
422
|
|
423
|
+
export type NativeRerankParams = {
|
424
|
+
normalize?: number
|
425
|
+
}
|
426
|
+
|
427
|
+
export type NativeRerankResult = {
|
428
|
+
score: number
|
429
|
+
index: number
|
430
|
+
}
|
431
|
+
|
403
432
|
export interface Spec extends TurboModule {
|
404
433
|
toggleNativeLog(enabled: boolean): Promise<void>
|
405
434
|
setContextLimit(limit: number): Promise<void>
|
@@ -420,6 +449,7 @@ export interface Spec extends TurboModule {
|
|
420
449
|
tools?: string
|
421
450
|
parallel_tool_calls?: string
|
422
451
|
tool_choice?: string
|
452
|
+
enable_thinking?: boolean
|
423
453
|
},
|
424
454
|
): Promise<JinjaFormattedChatResult | string>
|
425
455
|
loadSession(
|
@@ -445,6 +475,12 @@ export interface Spec extends TurboModule {
|
|
445
475
|
text: string,
|
446
476
|
params: NativeEmbeddingParams,
|
447
477
|
): Promise<NativeEmbeddingResult>
|
478
|
+
rerank(
|
479
|
+
contextId: number,
|
480
|
+
query: string,
|
481
|
+
documents: Array<string>,
|
482
|
+
params?: NativeRerankParams,
|
483
|
+
): Promise<Array<NativeRerankResult>>
|
448
484
|
bench(
|
449
485
|
contextId: number,
|
450
486
|
pp: number,
|
@@ -486,6 +522,14 @@ export interface Spec extends TurboModule {
|
|
486
522
|
contextId: number,
|
487
523
|
): Promise<void>
|
488
524
|
|
525
|
+
// TTS methods
|
526
|
+
initVocoder(contextId: number, vocoderModelPath: string): Promise<boolean>
|
527
|
+
isVocoderEnabled(contextId: number): Promise<boolean>
|
528
|
+
getFormattedAudioCompletion(contextId: number, speakerJsonStr: string, textToSpeak: string): Promise<string>
|
529
|
+
getAudioCompletionGuideTokens(contextId: number, textToSpeak: string): Promise<Array<number>>
|
530
|
+
decodeAudioTokens(contextId: number, tokens: number[]): Promise<Array<number>>
|
531
|
+
releaseVocoder(contextId: number): Promise<void>
|
532
|
+
|
489
533
|
releaseContext(contextId: number): Promise<void>
|
490
534
|
|
491
535
|
releaseAllContexts(): Promise<void>
|