cui-llama.rn 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +317 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +124 -117
- package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1263 -1245
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/README.md +4 -4
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -523
- package/cpp/ggml-metal.m +798 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +30 -6
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +873 -882
- package/cpp/rn-llama.h +138 -148
- package/cpp/sampling.cpp +3 -0
- package/cpp/sampling.h +107 -107
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +116 -105
- package/ios/RNLlama.h +7 -7
- package/ios/RNLlama.mm +418 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +835 -819
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +6 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +426 -424
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -485
package/src/NativeRNLlama.ts
CHANGED
@@ -1,424 +1,426 @@
|
|
1
|
-
import type { TurboModule } from 'react-native'
|
2
|
-
import { TurboModuleRegistry } from 'react-native'
|
3
|
-
|
4
|
-
export type NativeEmbeddingParams = {
|
5
|
-
embd_normalize?: number
|
6
|
-
}
|
7
|
-
|
8
|
-
export type NativeContextParams = {
|
9
|
-
model: string
|
10
|
-
/**
|
11
|
-
* Chat template to override the default one from the model.
|
12
|
-
*/
|
13
|
-
chat_template?: string
|
14
|
-
|
15
|
-
reasoning_format?: string
|
16
|
-
|
17
|
-
is_model_asset?: boolean
|
18
|
-
use_progress_callback?: boolean
|
19
|
-
|
20
|
-
n_ctx?: number
|
21
|
-
n_batch?: number
|
22
|
-
n_ubatch?: number
|
23
|
-
|
24
|
-
n_threads?: number
|
25
|
-
|
26
|
-
/**
|
27
|
-
* Number of layers to store in VRAM (Currently only for iOS)
|
28
|
-
*/
|
29
|
-
n_gpu_layers?: number
|
30
|
-
/**
|
31
|
-
* Skip GPU devices (iOS only)
|
32
|
-
*/
|
33
|
-
no_gpu_devices?: boolean
|
34
|
-
|
35
|
-
/**
|
36
|
-
* Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
|
37
|
-
*/
|
38
|
-
flash_attn?: boolean
|
39
|
-
|
40
|
-
/**
|
41
|
-
* KV cache data type for the K (Experimental in llama.cpp)
|
42
|
-
*/
|
43
|
-
cache_type_k?: string
|
44
|
-
/**
|
45
|
-
* KV cache data type for the V (Experimental in llama.cpp)
|
46
|
-
*/
|
47
|
-
cache_type_v?: string
|
48
|
-
|
49
|
-
use_mlock?: boolean
|
50
|
-
use_mmap?: boolean
|
51
|
-
vocab_only?: boolean
|
52
|
-
|
53
|
-
/**
|
54
|
-
* Single LoRA adapter path
|
55
|
-
*/
|
56
|
-
lora?: string
|
57
|
-
/**
|
58
|
-
* Single LoRA adapter scale
|
59
|
-
*/
|
60
|
-
lora_scaled?: number
|
61
|
-
/**
|
62
|
-
* LoRA adapter list
|
63
|
-
*/
|
64
|
-
lora_list?: Array<{ path: string; scaled?: number }>
|
65
|
-
|
66
|
-
rope_freq_base?: number
|
67
|
-
rope_freq_scale?: number
|
68
|
-
|
69
|
-
pooling_type?: number
|
70
|
-
|
71
|
-
// Embedding params
|
72
|
-
embedding?: boolean
|
73
|
-
embd_normalize?: number
|
74
|
-
}
|
75
|
-
|
76
|
-
export type NativeCompletionParams = {
|
77
|
-
prompt: string
|
78
|
-
n_threads?: number
|
79
|
-
/**
|
80
|
-
* JSON schema for convert to grammar for structured JSON output.
|
81
|
-
* It will be override by grammar if both are set.
|
82
|
-
*/
|
83
|
-
json_schema?: string
|
84
|
-
/**
|
85
|
-
* Set grammar for grammar-based sampling. Default: no grammar
|
86
|
-
*/
|
87
|
-
grammar?: string
|
88
|
-
/**
|
89
|
-
* Lazy grammar sampling, trigger by grammar_triggers. Default: false
|
90
|
-
*/
|
91
|
-
grammar_lazy?: boolean
|
92
|
-
/**
|
93
|
-
* Lazy grammar triggers. Default: []
|
94
|
-
*/
|
95
|
-
grammar_triggers?: Array<{
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
*
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
*
|
109
|
-
*
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
*
|
115
|
-
*
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
*
|
206
|
-
*
|
207
|
-
*
|
208
|
-
*
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
1
|
+
import type { TurboModule } from 'react-native'
|
2
|
+
import { TurboModuleRegistry } from 'react-native'
|
3
|
+
|
4
|
+
export type NativeEmbeddingParams = {
|
5
|
+
embd_normalize?: number
|
6
|
+
}
|
7
|
+
|
8
|
+
export type NativeContextParams = {
|
9
|
+
model: string
|
10
|
+
/**
|
11
|
+
* Chat template to override the default one from the model.
|
12
|
+
*/
|
13
|
+
chat_template?: string
|
14
|
+
|
15
|
+
reasoning_format?: string
|
16
|
+
|
17
|
+
is_model_asset?: boolean
|
18
|
+
use_progress_callback?: boolean
|
19
|
+
|
20
|
+
n_ctx?: number
|
21
|
+
n_batch?: number
|
22
|
+
n_ubatch?: number
|
23
|
+
|
24
|
+
n_threads?: number
|
25
|
+
|
26
|
+
/**
|
27
|
+
* Number of layers to store in VRAM (Currently only for iOS)
|
28
|
+
*/
|
29
|
+
n_gpu_layers?: number
|
30
|
+
/**
|
31
|
+
* Skip GPU devices (iOS only)
|
32
|
+
*/
|
33
|
+
no_gpu_devices?: boolean
|
34
|
+
|
35
|
+
/**
|
36
|
+
* Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
|
37
|
+
*/
|
38
|
+
flash_attn?: boolean
|
39
|
+
|
40
|
+
/**
|
41
|
+
* KV cache data type for the K (Experimental in llama.cpp)
|
42
|
+
*/
|
43
|
+
cache_type_k?: string
|
44
|
+
/**
|
45
|
+
* KV cache data type for the V (Experimental in llama.cpp)
|
46
|
+
*/
|
47
|
+
cache_type_v?: string
|
48
|
+
|
49
|
+
use_mlock?: boolean
|
50
|
+
use_mmap?: boolean
|
51
|
+
vocab_only?: boolean
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Single LoRA adapter path
|
55
|
+
*/
|
56
|
+
lora?: string
|
57
|
+
/**
|
58
|
+
* Single LoRA adapter scale
|
59
|
+
*/
|
60
|
+
lora_scaled?: number
|
61
|
+
/**
|
62
|
+
* LoRA adapter list
|
63
|
+
*/
|
64
|
+
lora_list?: Array<{ path: string; scaled?: number }>
|
65
|
+
|
66
|
+
rope_freq_base?: number
|
67
|
+
rope_freq_scale?: number
|
68
|
+
|
69
|
+
pooling_type?: number
|
70
|
+
|
71
|
+
// Embedding params
|
72
|
+
embedding?: boolean
|
73
|
+
embd_normalize?: number
|
74
|
+
}
|
75
|
+
|
76
|
+
export type NativeCompletionParams = {
|
77
|
+
prompt: string
|
78
|
+
n_threads?: number
|
79
|
+
/**
|
80
|
+
* JSON schema for convert to grammar for structured JSON output.
|
81
|
+
* It will be override by grammar if both are set.
|
82
|
+
*/
|
83
|
+
json_schema?: string
|
84
|
+
/**
|
85
|
+
* Set grammar for grammar-based sampling. Default: no grammar
|
86
|
+
*/
|
87
|
+
grammar?: string
|
88
|
+
/**
|
89
|
+
* Lazy grammar sampling, trigger by grammar_triggers. Default: false
|
90
|
+
*/
|
91
|
+
grammar_lazy?: boolean
|
92
|
+
/**
|
93
|
+
* Lazy grammar triggers. Default: []
|
94
|
+
*/
|
95
|
+
grammar_triggers?: Array<{
|
96
|
+
type: number
|
97
|
+
value: string
|
98
|
+
token: number
|
99
|
+
}>
|
100
|
+
preserved_tokens?: Array<string>
|
101
|
+
chat_format?: number
|
102
|
+
/**
|
103
|
+
* Specify a JSON array of stopping strings.
|
104
|
+
* These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
|
105
|
+
*/
|
106
|
+
stop?: Array<string>
|
107
|
+
/**
|
108
|
+
* Set the maximum number of tokens to predict when generating text.
|
109
|
+
* **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.
|
110
|
+
* When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.
|
111
|
+
*/
|
112
|
+
n_predict?: number
|
113
|
+
/**
|
114
|
+
* If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.
|
115
|
+
* Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.
|
116
|
+
* Default: `0`
|
117
|
+
*/
|
118
|
+
n_probs?: number
|
119
|
+
/**
|
120
|
+
* Limit the next token selection to the K most probable tokens. Default: `40`
|
121
|
+
*/
|
122
|
+
top_k?: number
|
123
|
+
/**
|
124
|
+
* Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`
|
125
|
+
*/
|
126
|
+
top_p?: number
|
127
|
+
/**
|
128
|
+
* The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`
|
129
|
+
*/
|
130
|
+
min_p?: number
|
131
|
+
/**
|
132
|
+
* Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.
|
133
|
+
*/
|
134
|
+
xtc_probability?: number
|
135
|
+
/**
|
136
|
+
* Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)
|
137
|
+
*/
|
138
|
+
xtc_threshold?: number
|
139
|
+
/**
|
140
|
+
* Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
|
141
|
+
*/
|
142
|
+
typical_p?: number
|
143
|
+
/**
|
144
|
+
* Adjust the randomness of the generated text. Default: `0.8`
|
145
|
+
*/
|
146
|
+
temperature?: number
|
147
|
+
/**
|
148
|
+
* Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.
|
149
|
+
*/
|
150
|
+
penalty_last_n?: number
|
151
|
+
/**
|
152
|
+
* Control the repetition of token sequences in the generated text. Default: `1.0`
|
153
|
+
*/
|
154
|
+
penalty_repeat?: number
|
155
|
+
/**
|
156
|
+
* Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
|
157
|
+
*/
|
158
|
+
penalty_freq?: number
|
159
|
+
/**
|
160
|
+
* Repeat alpha presence penalty. Default: `0.0`, which is disabled.
|
161
|
+
*/
|
162
|
+
penalty_present?: number
|
163
|
+
/**
|
164
|
+
* Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
|
165
|
+
*/
|
166
|
+
mirostat?: number
|
167
|
+
/**
|
168
|
+
* Set the Mirostat target entropy, parameter tau. Default: `5.0`
|
169
|
+
*/
|
170
|
+
mirostat_tau?: number
|
171
|
+
/**
|
172
|
+
* Set the Mirostat learning rate, parameter eta. Default: `0.1`
|
173
|
+
*/
|
174
|
+
mirostat_eta?: number
|
175
|
+
/**
|
176
|
+
* Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.
|
177
|
+
*/
|
178
|
+
dry_multiplier?: number
|
179
|
+
/**
|
180
|
+
* Set the DRY repetition penalty base value. Default: `1.75`
|
181
|
+
*/
|
182
|
+
dry_base?: number
|
183
|
+
/**
|
184
|
+
* Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`
|
185
|
+
*/
|
186
|
+
dry_allowed_length?: number
|
187
|
+
/**
|
188
|
+
* How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.
|
189
|
+
*/
|
190
|
+
dry_penalty_last_n?: number
|
191
|
+
/**
|
192
|
+
* Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\n', ':', '"', '*']`
|
193
|
+
*/
|
194
|
+
dry_sequence_breakers?: Array<string>
|
195
|
+
/**
|
196
|
+
* Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)
|
197
|
+
*/
|
198
|
+
top_n_sigma?: number
|
199
|
+
|
200
|
+
/**
|
201
|
+
* Ignore end of stream token and continue generating. Default: `false`
|
202
|
+
*/
|
203
|
+
ignore_eos?: boolean
|
204
|
+
/**
|
205
|
+
* Modify the likelihood of a token appearing in the generated text completion.
|
206
|
+
* For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood.
|
207
|
+
* Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,
|
208
|
+
* e.g.`[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.
|
209
|
+
* Default: `[]`
|
210
|
+
*/
|
211
|
+
logit_bias?: Array<Array<number>>
|
212
|
+
/**
|
213
|
+
* Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
|
214
|
+
*/
|
215
|
+
seed?: number
|
216
|
+
|
217
|
+
emit_partial_completion: boolean
|
218
|
+
}
|
219
|
+
|
220
|
+
export type NativeCompletionTokenProbItem = {
|
221
|
+
tok_str: string
|
222
|
+
prob: number
|
223
|
+
}
|
224
|
+
|
225
|
+
export type NativeCompletionTokenProb = {
|
226
|
+
content: string
|
227
|
+
probs: Array<NativeCompletionTokenProbItem>
|
228
|
+
}
|
229
|
+
|
230
|
+
export type NativeCompletionResultTimings = {
|
231
|
+
prompt_n: number
|
232
|
+
prompt_ms: number
|
233
|
+
prompt_per_token_ms: number
|
234
|
+
prompt_per_second: number
|
235
|
+
predicted_n: number
|
236
|
+
predicted_ms: number
|
237
|
+
predicted_per_token_ms: number
|
238
|
+
predicted_per_second: number
|
239
|
+
}
|
240
|
+
|
241
|
+
export type NativeCompletionResult = {
|
242
|
+
/**
|
243
|
+
* Original text (Ignored reasoning_content / tool_calls)
|
244
|
+
*/
|
245
|
+
text: string
|
246
|
+
/**
|
247
|
+
* Reasoning content (parsed for reasoning model)
|
248
|
+
*/
|
249
|
+
reasoning_content: string
|
250
|
+
/**
|
251
|
+
* Tool calls
|
252
|
+
*/
|
253
|
+
tool_calls: Array<{
|
254
|
+
type: 'function'
|
255
|
+
function: {
|
256
|
+
name: string
|
257
|
+
arguments: string
|
258
|
+
}
|
259
|
+
id?: string
|
260
|
+
}>
|
261
|
+
/**
|
262
|
+
* Content text (Filtered text by reasoning_content / tool_calls)
|
263
|
+
*/
|
264
|
+
content: string
|
265
|
+
|
266
|
+
tokens_predicted: number
|
267
|
+
tokens_evaluated: number
|
268
|
+
truncated: boolean
|
269
|
+
stopped_eos: boolean
|
270
|
+
stopped_word: string
|
271
|
+
stopped_limit: number
|
272
|
+
stopping_word: string
|
273
|
+
tokens_cached: number
|
274
|
+
timings: NativeCompletionResultTimings
|
275
|
+
|
276
|
+
completion_probabilities?: Array<NativeCompletionTokenProb>
|
277
|
+
}
|
278
|
+
|
279
|
+
export type NativeTokenizeResult = {
|
280
|
+
tokens: Array<number>
|
281
|
+
}
|
282
|
+
|
283
|
+
export type NativeEmbeddingResult = {
|
284
|
+
embedding: Array<number>
|
285
|
+
}
|
286
|
+
|
287
|
+
export type NativeLlamaContext = {
|
288
|
+
contextId: number
|
289
|
+
model: {
|
290
|
+
desc: string
|
291
|
+
size: number
|
292
|
+
nEmbd: number
|
293
|
+
nParams: number
|
294
|
+
chatTemplates: {
|
295
|
+
llamaChat: boolean // Chat template in llama-chat.cpp
|
296
|
+
minja: {
|
297
|
+
// Chat template supported by minja.hpp
|
298
|
+
default: boolean
|
299
|
+
defaultCaps: {
|
300
|
+
tools: boolean
|
301
|
+
toolCalls: boolean
|
302
|
+
toolResponses: boolean
|
303
|
+
systemRole: boolean
|
304
|
+
parallelToolCalls: boolean
|
305
|
+
toolCallId: boolean
|
306
|
+
}
|
307
|
+
toolUse: boolean
|
308
|
+
toolUseCaps: {
|
309
|
+
tools: boolean
|
310
|
+
toolCalls: boolean
|
311
|
+
toolResponses: boolean
|
312
|
+
systemRole: boolean
|
313
|
+
parallelToolCalls: boolean
|
314
|
+
toolCallId: boolean
|
315
|
+
}
|
316
|
+
}
|
317
|
+
}
|
318
|
+
metadata: Object
|
319
|
+
isChatTemplateSupported: boolean // Deprecated
|
320
|
+
}
|
321
|
+
/**
|
322
|
+
* Loaded library name for Android
|
323
|
+
*/
|
324
|
+
androidLib?: string
|
325
|
+
gpu: boolean
|
326
|
+
reasonNoGPU: string
|
327
|
+
}
|
328
|
+
|
329
|
+
export type NativeSessionLoadResult = {
|
330
|
+
tokens_loaded: number
|
331
|
+
prompt: string
|
332
|
+
}
|
333
|
+
|
334
|
+
export type NativeLlamaChatMessage = {
|
335
|
+
role: string
|
336
|
+
content: string
|
337
|
+
}
|
338
|
+
|
339
|
+
export type NativeCPUFeatures = {
|
340
|
+
armv8: boolean
|
341
|
+
i8mm: boolean
|
342
|
+
dotprod: boolean
|
343
|
+
}
|
344
|
+
|
345
|
+
export type JinjaFormattedChatResult = {
|
346
|
+
prompt: string
|
347
|
+
chat_format?: number
|
348
|
+
grammar?: string
|
349
|
+
grammar_lazy?: boolean
|
350
|
+
grammar_triggers?: Array<{
|
351
|
+
type: number
|
352
|
+
value: string
|
353
|
+
token: number
|
354
|
+
}>
|
355
|
+
preserved_tokens?: Array<string>
|
356
|
+
additional_stops?: Array<string>
|
357
|
+
}
|
358
|
+
|
359
|
+
export interface Spec extends TurboModule {
|
360
|
+
toggleNativeLog(enabled: boolean): Promise<void>
|
361
|
+
setContextLimit(limit: number): Promise<void>
|
362
|
+
|
363
|
+
modelInfo(path: string, skip?: string[]): Promise<Object>
|
364
|
+
initContext(
|
365
|
+
contextId: number,
|
366
|
+
params: NativeContextParams,
|
367
|
+
): Promise<NativeLlamaContext>
|
368
|
+
|
369
|
+
getFormattedChat(
|
370
|
+
contextId: number,
|
371
|
+
messages: string,
|
372
|
+
chatTemplate?: string,
|
373
|
+
params?: {
|
374
|
+
jinja?: boolean
|
375
|
+
json_schema?: string
|
376
|
+
tools?: string
|
377
|
+
parallel_tool_calls?: string
|
378
|
+
tool_choice?: string
|
379
|
+
},
|
380
|
+
): Promise<JinjaFormattedChatResult | string>
|
381
|
+
loadSession(
|
382
|
+
contextId: number,
|
383
|
+
filepath: string,
|
384
|
+
): Promise<NativeSessionLoadResult>
|
385
|
+
saveSession(
|
386
|
+
contextId: number,
|
387
|
+
filepath: string,
|
388
|
+
size: number,
|
389
|
+
): Promise<number>
|
390
|
+
completion(
|
391
|
+
contextId: number,
|
392
|
+
params: NativeCompletionParams,
|
393
|
+
): Promise<NativeCompletionResult>
|
394
|
+
stopCompletion(contextId: number): Promise<void>
|
395
|
+
tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
|
396
|
+
tokenizeSync(contextId: number, text: string): NativeTokenizeResult
|
397
|
+
getCpuFeatures() : Promise<NativeCPUFeatures>
|
398
|
+
detokenize(contextId: number, tokens: number[]): Promise<string>
|
399
|
+
embedding(
|
400
|
+
contextId: number,
|
401
|
+
text: string,
|
402
|
+
params: NativeEmbeddingParams,
|
403
|
+
): Promise<NativeEmbeddingResult>
|
404
|
+
bench(
|
405
|
+
contextId: number,
|
406
|
+
pp: number,
|
407
|
+
tg: number,
|
408
|
+
pl: number,
|
409
|
+
nr: number,
|
410
|
+
): Promise<string>
|
411
|
+
|
412
|
+
applyLoraAdapters(
|
413
|
+
contextId: number,
|
414
|
+
loraAdapters: Array<{ path: string; scaled?: number }>,
|
415
|
+
): Promise<void>
|
416
|
+
removeLoraAdapters(contextId: number): Promise<void>
|
417
|
+
getLoadedLoraAdapters(
|
418
|
+
contextId: number,
|
419
|
+
): Promise<Array<{ path: string; scaled?: number }>>
|
420
|
+
|
421
|
+
releaseContext(contextId: number): Promise<void>
|
422
|
+
|
423
|
+
releaseAllContexts(): Promise<void>
|
424
|
+
}
|
425
|
+
|
426
|
+
export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec
|