cui-llama.rn 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +345 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +129 -124
- package/android/src/main/java/com/rnllama/LlamaContext.java +648 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1279 -1263
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/LICENSE +21 -0
- package/cpp/README.md +4 -4
- package/cpp/chat.cpp +1 -1
- package/cpp/common.cpp +17 -2
- package/cpp/common.h +7 -3
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/{binary-ops.h → ggml-cpu/binary-ops.h} +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -101
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +109 -42
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +3 -0
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +246 -160
- package/cpp/{ops.h → ggml-cpu/ops.h} +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/cpp/{simd-mappings.h → ggml-cpu/simd-mappings.h} +7 -3
- package/cpp/{unary-ops.h → ggml-cpu/unary-ops.h} +1 -1
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -597
- package/cpp/ggml-metal.m +496 -47
- package/cpp/ggml.c +134 -244
- package/cpp/ggml.h +62 -95
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-arch.cpp +46 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +5 -1
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +31 -10
- package/cpp/llama-chat.h +3 -2
- package/cpp/llama-context.cpp +104 -489
- package/cpp/llama-context.h +14 -30
- package/cpp/llama-graph.cpp +69 -62
- package/cpp/llama-graph.h +21 -18
- package/cpp/llama-hparams.h +5 -0
- package/cpp/llama-kv-cache.cpp +1497 -391
- package/cpp/llama-kv-cache.h +272 -80
- package/cpp/llama-memory.h +11 -1
- package/cpp/llama-model.cpp +502 -176
- package/cpp/llama-model.h +13 -3
- package/cpp/llama-sampling.cpp +2 -1
- package/cpp/llama-vocab.cpp +8 -1
- package/cpp/llama.h +14 -11
- package/cpp/rn-llama.cpp +721 -873
- package/cpp/rn-llama.h +134 -138
- package/cpp/sampling.h +107 -107
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/ios/CMakeLists.txt +119 -108
- package/ios/RNLlama.h +13 -7
- package/ios/RNLlama.mm +423 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +833 -835
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +681 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2189 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +249 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +419 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1437 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +681 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +601 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2189 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +437 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +89 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +57 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +249 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +595 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +161 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +405 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +31 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +419 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1437 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +134 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +681 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +601 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2189 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +437 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +89 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +57 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +249 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +595 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +161 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +405 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +31 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +419 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1437 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +134 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +10 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +431 -426
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -487
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
- /package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -0
- /package/cpp/{vec.h → ggml-cpu/vec.h} +0 -0
@@ -1,695 +1,695 @@
|
|
1
|
-
package com.rnllama;
|
2
|
-
|
3
|
-
import androidx.annotation.NonNull;
|
4
|
-
import android.util.Log;
|
5
|
-
import android.os.Build;
|
6
|
-
import android.os.Handler;
|
7
|
-
import android.os.AsyncTask;
|
8
|
-
|
9
|
-
import com.facebook.react.bridge.Promise;
|
10
|
-
import com.facebook.react.bridge.ReactApplicationContext;
|
11
|
-
import com.facebook.react.bridge.ReactMethod;
|
12
|
-
import com.facebook.react.bridge.LifecycleEventListener;
|
13
|
-
import com.facebook.react.bridge.ReadableMap;
|
14
|
-
import com.facebook.react.bridge.ReadableArray;
|
15
|
-
import com.facebook.react.bridge.WritableMap;
|
16
|
-
import com.facebook.react.bridge.Arguments;
|
17
|
-
|
18
|
-
import java.util.HashMap;
|
19
|
-
import java.util.Random;
|
20
|
-
import java.io.File;
|
21
|
-
import java.io.FileInputStream;
|
22
|
-
import java.io.PushbackInputStream;
|
23
|
-
|
24
|
-
public class RNLlama implements LifecycleEventListener {
|
25
|
-
public static final String NAME = "RNLlama";
|
26
|
-
|
27
|
-
private ReactApplicationContext reactContext;
|
28
|
-
|
29
|
-
public RNLlama(ReactApplicationContext reactContext) {
|
30
|
-
reactContext.addLifecycleEventListener(this);
|
31
|
-
this.reactContext = reactContext;
|
32
|
-
}
|
33
|
-
|
34
|
-
private HashMap<AsyncTask, String> tasks = new HashMap<>();
|
35
|
-
|
36
|
-
private HashMap<Integer, LlamaContext> contexts = new HashMap<>();
|
37
|
-
|
38
|
-
public void toggleNativeLog(boolean enabled, Promise promise) {
|
39
|
-
new AsyncTask<Void, Void, Boolean>() {
|
40
|
-
private Exception exception;
|
41
|
-
|
42
|
-
@Override
|
43
|
-
protected Boolean doInBackground(Void... voids) {
|
44
|
-
try {
|
45
|
-
LlamaContext.toggleNativeLog(reactContext, enabled);
|
46
|
-
return true;
|
47
|
-
} catch (Exception e) {
|
48
|
-
exception = e;
|
49
|
-
}
|
50
|
-
return null;
|
51
|
-
}
|
52
|
-
|
53
|
-
@Override
|
54
|
-
protected void onPostExecute(Boolean result) {
|
55
|
-
if (exception != null) {
|
56
|
-
promise.reject(exception);
|
57
|
-
return;
|
58
|
-
}
|
59
|
-
promise.resolve(result);
|
60
|
-
}
|
61
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
62
|
-
}
|
63
|
-
|
64
|
-
private int llamaContextLimit = -1;
|
65
|
-
|
66
|
-
public void setContextLimit(double limit, Promise promise) {
|
67
|
-
llamaContextLimit = (int) limit;
|
68
|
-
promise.resolve(null);
|
69
|
-
}
|
70
|
-
|
71
|
-
public void modelInfo(final String model, final ReadableArray skip, final Promise promise) {
|
72
|
-
new AsyncTask<Void, Void, WritableMap>() {
|
73
|
-
private Exception exception;
|
74
|
-
|
75
|
-
@Override
|
76
|
-
protected WritableMap doInBackground(Void... voids) {
|
77
|
-
try {
|
78
|
-
String[] skipArray = new String[skip.size()];
|
79
|
-
for (int i = 0; i < skip.size(); i++) {
|
80
|
-
skipArray[i] = skip.getString(i);
|
81
|
-
}
|
82
|
-
return LlamaContext.modelInfo(model, skipArray);
|
83
|
-
} catch (Exception e) {
|
84
|
-
exception = e;
|
85
|
-
}
|
86
|
-
return null;
|
87
|
-
}
|
88
|
-
|
89
|
-
@Override
|
90
|
-
protected void onPostExecute(WritableMap result) {
|
91
|
-
if (exception != null) {
|
92
|
-
promise.reject(exception);
|
93
|
-
return;
|
94
|
-
}
|
95
|
-
promise.resolve(result);
|
96
|
-
}
|
97
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
98
|
-
}
|
99
|
-
|
100
|
-
public void initContext(double id, final ReadableMap params, final Promise promise) {
|
101
|
-
final int contextId = (int) id;
|
102
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
103
|
-
private Exception exception;
|
104
|
-
|
105
|
-
@Override
|
106
|
-
protected WritableMap doInBackground(Void... voids) {
|
107
|
-
try {
|
108
|
-
LlamaContext context = contexts.get(contextId);
|
109
|
-
if (context != null) {
|
110
|
-
throw new Exception("Context already exists");
|
111
|
-
}
|
112
|
-
if (llamaContextLimit > -1 && contexts.size() >= llamaContextLimit) {
|
113
|
-
throw new Exception("Context limit reached");
|
114
|
-
}
|
115
|
-
LlamaContext llamaContext = new LlamaContext(contextId, reactContext, params);
|
116
|
-
if (llamaContext.getContext() == 0) {
|
117
|
-
throw new Exception("Failed to initialize context");
|
118
|
-
}
|
119
|
-
contexts.put(contextId, llamaContext);
|
120
|
-
WritableMap result = Arguments.createMap();
|
121
|
-
result.putBoolean("gpu", false);
|
122
|
-
result.putString("reasonNoGPU", "Currently not supported");
|
123
|
-
result.putMap("model", llamaContext.getModelDetails());
|
124
|
-
result.putString("androidLib", llamaContext.getLoadedLibrary());
|
125
|
-
return result;
|
126
|
-
} catch (Exception e) {
|
127
|
-
exception = e;
|
128
|
-
return null;
|
129
|
-
}
|
130
|
-
}
|
131
|
-
|
132
|
-
@Override
|
133
|
-
protected void onPostExecute(WritableMap result) {
|
134
|
-
if (exception != null) {
|
135
|
-
promise.reject(exception);
|
136
|
-
return;
|
137
|
-
}
|
138
|
-
promise.resolve(result);
|
139
|
-
tasks.remove(this);
|
140
|
-
}
|
141
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
142
|
-
tasks.put(task, "initContext");
|
143
|
-
}
|
144
|
-
|
145
|
-
public void getFormattedChat(double id, final String messages, final String chatTemplate, final ReadableMap params, Promise promise) {
|
146
|
-
final int contextId = (int) id;
|
147
|
-
AsyncTask task = new AsyncTask<Void, Void, Object>() {
|
148
|
-
private Exception exception;
|
149
|
-
|
150
|
-
@Override
|
151
|
-
protected Object doInBackground(Void... voids) {
|
152
|
-
try {
|
153
|
-
LlamaContext context = contexts.get(contextId);
|
154
|
-
if (context == null) {
|
155
|
-
throw new Exception("Context not found");
|
156
|
-
}
|
157
|
-
if (params.hasKey("jinja") && params.getBoolean("jinja")) {
|
158
|
-
ReadableMap result = context.getFormattedChatWithJinja(messages, chatTemplate, params);
|
159
|
-
if (result.hasKey("_error")) {
|
160
|
-
throw new Exception(result.getString("_error"));
|
161
|
-
}
|
162
|
-
return result;
|
163
|
-
}
|
164
|
-
return context.getFormattedChat(messages, chatTemplate);
|
165
|
-
} catch (Exception e) {
|
166
|
-
exception = e;
|
167
|
-
return null;
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
|
-
@Override
|
172
|
-
protected void onPostExecute(Object result) {
|
173
|
-
if (exception != null) {
|
174
|
-
promise.reject(exception);
|
175
|
-
return;
|
176
|
-
}
|
177
|
-
promise.resolve(result);
|
178
|
-
tasks.remove(this);
|
179
|
-
}
|
180
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
181
|
-
tasks.put(task, "getFormattedChat-" + contextId);
|
182
|
-
}
|
183
|
-
|
184
|
-
public void loadSession(double id, final String path, Promise promise) {
|
185
|
-
final int contextId = (int) id;
|
186
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
187
|
-
private Exception exception;
|
188
|
-
|
189
|
-
@Override
|
190
|
-
protected WritableMap doInBackground(Void... voids) {
|
191
|
-
try {
|
192
|
-
LlamaContext context = contexts.get(contextId);
|
193
|
-
if (context == null) {
|
194
|
-
throw new Exception("Context not found");
|
195
|
-
}
|
196
|
-
WritableMap result = context.loadSession(path);
|
197
|
-
return result;
|
198
|
-
} catch (Exception e) {
|
199
|
-
exception = e;
|
200
|
-
}
|
201
|
-
return null;
|
202
|
-
}
|
203
|
-
|
204
|
-
@Override
|
205
|
-
protected void onPostExecute(WritableMap result) {
|
206
|
-
if (exception != null) {
|
207
|
-
promise.reject(exception);
|
208
|
-
return;
|
209
|
-
}
|
210
|
-
promise.resolve(result);
|
211
|
-
tasks.remove(this);
|
212
|
-
}
|
213
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
214
|
-
tasks.put(task, "loadSession-" + contextId);
|
215
|
-
}
|
216
|
-
|
217
|
-
public void saveSession(double id, final String path, double size, Promise promise) {
|
218
|
-
final int contextId = (int) id;
|
219
|
-
AsyncTask task = new AsyncTask<Void, Void, Integer>() {
|
220
|
-
private Exception exception;
|
221
|
-
|
222
|
-
@Override
|
223
|
-
protected Integer doInBackground(Void... voids) {
|
224
|
-
try {
|
225
|
-
LlamaContext context = contexts.get(contextId);
|
226
|
-
if (context == null) {
|
227
|
-
throw new Exception("Context not found");
|
228
|
-
}
|
229
|
-
Integer count = context.saveSession(path, (int) size);
|
230
|
-
return count;
|
231
|
-
} catch (Exception e) {
|
232
|
-
exception = e;
|
233
|
-
}
|
234
|
-
return -1;
|
235
|
-
}
|
236
|
-
|
237
|
-
@Override
|
238
|
-
protected void onPostExecute(Integer result) {
|
239
|
-
if (exception != null) {
|
240
|
-
promise.reject(exception);
|
241
|
-
return;
|
242
|
-
}
|
243
|
-
promise.resolve(result);
|
244
|
-
tasks.remove(this);
|
245
|
-
}
|
246
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
247
|
-
tasks.put(task, "saveSession-" + contextId);
|
248
|
-
}
|
249
|
-
|
250
|
-
public void completion(double id, final ReadableMap params, final Promise promise) {
|
251
|
-
final int contextId = (int) id;
|
252
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
253
|
-
private Exception exception;
|
254
|
-
|
255
|
-
@Override
|
256
|
-
protected WritableMap doInBackground(Void... voids) {
|
257
|
-
try {
|
258
|
-
LlamaContext context = contexts.get(contextId);
|
259
|
-
if (context == null) {
|
260
|
-
throw new Exception("Context not found");
|
261
|
-
}
|
262
|
-
if (context.isPredicting()) {
|
263
|
-
throw new Exception("Context is busy");
|
264
|
-
}
|
265
|
-
WritableMap result = context.completion(params);
|
266
|
-
return result;
|
267
|
-
} catch (Exception e) {
|
268
|
-
exception = e;
|
269
|
-
}
|
270
|
-
return null;
|
271
|
-
}
|
272
|
-
|
273
|
-
@Override
|
274
|
-
protected void onPostExecute(WritableMap result) {
|
275
|
-
if (exception != null) {
|
276
|
-
promise.reject(exception);
|
277
|
-
return;
|
278
|
-
}
|
279
|
-
promise.resolve(result);
|
280
|
-
tasks.remove(this);
|
281
|
-
}
|
282
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
283
|
-
tasks.put(task, "completion-" + contextId);
|
284
|
-
}
|
285
|
-
|
286
|
-
public void stopCompletion(double id, final Promise promise) {
|
287
|
-
final int contextId = (int) id;
|
288
|
-
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
289
|
-
private Exception exception;
|
290
|
-
|
291
|
-
@Override
|
292
|
-
protected Void doInBackground(Void... voids) {
|
293
|
-
try {
|
294
|
-
LlamaContext context = contexts.get(contextId);
|
295
|
-
if (context == null) {
|
296
|
-
throw new Exception("Context not found");
|
297
|
-
}
|
298
|
-
context.stopCompletion();
|
299
|
-
AsyncTask completionTask = null;
|
300
|
-
for (AsyncTask task : tasks.keySet()) {
|
301
|
-
if (tasks.get(task).equals("completion-" + contextId)) {
|
302
|
-
task.get();
|
303
|
-
break;
|
304
|
-
}
|
305
|
-
}
|
306
|
-
} catch (Exception e) {
|
307
|
-
exception = e;
|
308
|
-
}
|
309
|
-
return null;
|
310
|
-
}
|
311
|
-
|
312
|
-
@Override
|
313
|
-
protected void onPostExecute(Void result) {
|
314
|
-
if (exception != null) {
|
315
|
-
promise.reject(exception);
|
316
|
-
return;
|
317
|
-
}
|
318
|
-
promise.resolve(result);
|
319
|
-
tasks.remove(this);
|
320
|
-
}
|
321
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
322
|
-
tasks.put(task, "stopCompletion-" + contextId);
|
323
|
-
}
|
324
|
-
|
325
|
-
public void tokenizeAsync(double id, final String text, final Promise promise) {
|
326
|
-
final int contextId = (int) id;
|
327
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
328
|
-
private Exception exception;
|
329
|
-
|
330
|
-
@Override
|
331
|
-
protected WritableMap doInBackground(Void... voids) {
|
332
|
-
try {
|
333
|
-
LlamaContext context = contexts.get(contextId);
|
334
|
-
if (context == null) {
|
335
|
-
throw new Exception("Context not found");
|
336
|
-
}
|
337
|
-
return context.tokenize(text);
|
338
|
-
} catch (Exception e) {
|
339
|
-
exception = e;
|
340
|
-
}
|
341
|
-
return null;
|
342
|
-
}
|
343
|
-
|
344
|
-
@Override
|
345
|
-
protected void onPostExecute(WritableMap result) {
|
346
|
-
if (exception != null) {
|
347
|
-
promise.reject(exception);
|
348
|
-
return;
|
349
|
-
}
|
350
|
-
promise.resolve(result);
|
351
|
-
tasks.remove(this);
|
352
|
-
}
|
353
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
354
|
-
tasks.put(task, "tokenize-" + contextId);
|
355
|
-
}
|
356
|
-
|
357
|
-
public WritableMap tokenizeSync(double id, final String text) {
|
358
|
-
int contextId = (int) id;
|
359
|
-
LlamaContext context = contexts.get(contextId);
|
360
|
-
if (context == null) {
|
361
|
-
return Arguments.createMap();
|
362
|
-
}
|
363
|
-
return context.tokenize(text);
|
364
|
-
}
|
365
|
-
|
366
|
-
public void getCpuFeatures(Promise promise) {
|
367
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
368
|
-
private Exception exception;
|
369
|
-
@Override
|
370
|
-
protected WritableMap doInBackground(Void... voids) {
|
371
|
-
try {
|
372
|
-
WritableMap result = Arguments.createMap();
|
373
|
-
boolean isV8 = LlamaContext.isArm64V8a();
|
374
|
-
result.putBoolean("armv8", isV8);
|
375
|
-
|
376
|
-
if(isV8) {
|
377
|
-
String cpuFeatures = LlamaContext.getCpuFeatures();
|
378
|
-
boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
|
379
|
-
boolean hasInt8Matmul = cpuFeatures.contains("i8mm");
|
380
|
-
result.putBoolean("i8mm", hasInt8Matmul);
|
381
|
-
result.putBoolean("dotprod", hasDotProd);
|
382
|
-
} else {
|
383
|
-
result.putBoolean("i8mm", false);
|
384
|
-
result.putBoolean("dotprod", false);
|
385
|
-
}
|
386
|
-
return result;
|
387
|
-
} catch (Exception e) {
|
388
|
-
exception = e;
|
389
|
-
return null;
|
390
|
-
}
|
391
|
-
}
|
392
|
-
|
393
|
-
@Override
|
394
|
-
protected void onPostExecute(WritableMap result) {
|
395
|
-
if (exception != null) {
|
396
|
-
promise.reject(exception);
|
397
|
-
return;
|
398
|
-
}
|
399
|
-
promise.resolve(result);
|
400
|
-
tasks.remove(this);
|
401
|
-
}
|
402
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
403
|
-
tasks.put(task, "getCPUFeatures");
|
404
|
-
}
|
405
|
-
|
406
|
-
public void detokenize(double id, final ReadableArray tokens, final Promise promise) {
|
407
|
-
final int contextId = (int) id;
|
408
|
-
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
409
|
-
private Exception exception;
|
410
|
-
|
411
|
-
@Override
|
412
|
-
protected String doInBackground(Void... voids) {
|
413
|
-
try {
|
414
|
-
LlamaContext context = contexts.get(contextId);
|
415
|
-
if (context == null) {
|
416
|
-
throw new Exception("Context not found");
|
417
|
-
}
|
418
|
-
return context.detokenize(tokens);
|
419
|
-
} catch (Exception e) {
|
420
|
-
exception = e;
|
421
|
-
}
|
422
|
-
return null;
|
423
|
-
}
|
424
|
-
|
425
|
-
@Override
|
426
|
-
protected void onPostExecute(String result) {
|
427
|
-
if (exception != null) {
|
428
|
-
promise.reject(exception);
|
429
|
-
return;
|
430
|
-
}
|
431
|
-
promise.resolve(result);
|
432
|
-
tasks.remove(this);
|
433
|
-
}
|
434
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
435
|
-
tasks.put(task, "detokenize-" + contextId);
|
436
|
-
}
|
437
|
-
|
438
|
-
public void embedding(double id, final String text, final ReadableMap params, final Promise promise) {
|
439
|
-
final int contextId = (int) id;
|
440
|
-
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
441
|
-
private Exception exception;
|
442
|
-
|
443
|
-
@Override
|
444
|
-
protected WritableMap doInBackground(Void... voids) {
|
445
|
-
try {
|
446
|
-
LlamaContext context = contexts.get(contextId);
|
447
|
-
if (context == null) {
|
448
|
-
throw new Exception("Context not found");
|
449
|
-
}
|
450
|
-
return context.getEmbedding(text, params);
|
451
|
-
} catch (Exception e) {
|
452
|
-
exception = e;
|
453
|
-
}
|
454
|
-
return null;
|
455
|
-
}
|
456
|
-
|
457
|
-
@Override
|
458
|
-
protected void onPostExecute(WritableMap result) {
|
459
|
-
if (exception != null) {
|
460
|
-
promise.reject(exception);
|
461
|
-
return;
|
462
|
-
}
|
463
|
-
promise.resolve(result);
|
464
|
-
tasks.remove(this);
|
465
|
-
}
|
466
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
467
|
-
tasks.put(task, "embedding-" + contextId);
|
468
|
-
}
|
469
|
-
|
470
|
-
public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
|
471
|
-
final int contextId = (int) id;
|
472
|
-
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
473
|
-
private Exception exception;
|
474
|
-
|
475
|
-
@Override
|
476
|
-
protected String doInBackground(Void... voids) {
|
477
|
-
try {
|
478
|
-
LlamaContext context = contexts.get(contextId);
|
479
|
-
if (context == null) {
|
480
|
-
throw new Exception("Context not found");
|
481
|
-
}
|
482
|
-
return context.bench((int) pp, (int) tg, (int) pl, (int) nr);
|
483
|
-
} catch (Exception e) {
|
484
|
-
exception = e;
|
485
|
-
}
|
486
|
-
return null;
|
487
|
-
}
|
488
|
-
|
489
|
-
@Override
|
490
|
-
protected void onPostExecute(String result) {
|
491
|
-
if (exception != null) {
|
492
|
-
promise.reject(exception);
|
493
|
-
return;
|
494
|
-
}
|
495
|
-
promise.resolve(result);
|
496
|
-
tasks.remove(this);
|
497
|
-
}
|
498
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
499
|
-
tasks.put(task, "bench-" + contextId);
|
500
|
-
}
|
501
|
-
|
502
|
-
public void applyLoraAdapters(double id, final ReadableArray loraAdapters, final Promise promise) {
|
503
|
-
final int contextId = (int) id;
|
504
|
-
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
505
|
-
private Exception exception;
|
506
|
-
|
507
|
-
@Override
|
508
|
-
protected Void doInBackground(Void... voids) {
|
509
|
-
try {
|
510
|
-
LlamaContext context = contexts.get(contextId);
|
511
|
-
if (context == null) {
|
512
|
-
throw new Exception("Context not found");
|
513
|
-
}
|
514
|
-
if (context.isPredicting()) {
|
515
|
-
throw new Exception("Context is busy");
|
516
|
-
}
|
517
|
-
context.applyLoraAdapters(loraAdapters);
|
518
|
-
} catch (Exception e) {
|
519
|
-
exception = e;
|
520
|
-
}
|
521
|
-
return null;
|
522
|
-
}
|
523
|
-
|
524
|
-
@Override
|
525
|
-
protected void onPostExecute(Void result) {
|
526
|
-
if (exception != null) {
|
527
|
-
promise.reject(exception);
|
528
|
-
return;
|
529
|
-
}
|
530
|
-
}
|
531
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
532
|
-
tasks.put(task, "applyLoraAdapters-" + contextId);
|
533
|
-
}
|
534
|
-
|
535
|
-
public void removeLoraAdapters(double id, final Promise promise) {
|
536
|
-
final int contextId = (int) id;
|
537
|
-
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
538
|
-
private Exception exception;
|
539
|
-
|
540
|
-
@Override
|
541
|
-
protected Void doInBackground(Void... voids) {
|
542
|
-
try {
|
543
|
-
LlamaContext context = contexts.get(contextId);
|
544
|
-
if (context == null) {
|
545
|
-
throw new Exception("Context not found");
|
546
|
-
}
|
547
|
-
if (context.isPredicting()) {
|
548
|
-
throw new Exception("Context is busy");
|
549
|
-
}
|
550
|
-
context.removeLoraAdapters();
|
551
|
-
} catch (Exception e) {
|
552
|
-
exception = e;
|
553
|
-
}
|
554
|
-
return null;
|
555
|
-
}
|
556
|
-
|
557
|
-
@Override
|
558
|
-
protected void onPostExecute(Void result) {
|
559
|
-
if (exception != null) {
|
560
|
-
promise.reject(exception);
|
561
|
-
return;
|
562
|
-
}
|
563
|
-
promise.resolve(null);
|
564
|
-
}
|
565
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
566
|
-
tasks.put(task, "removeLoraAdapters-" + contextId);
|
567
|
-
}
|
568
|
-
|
569
|
-
public void getLoadedLoraAdapters(double id, final Promise promise) {
|
570
|
-
final int contextId = (int) id;
|
571
|
-
AsyncTask task = new AsyncTask<Void, Void, ReadableArray>() {
|
572
|
-
private Exception exception;
|
573
|
-
|
574
|
-
@Override
|
575
|
-
protected ReadableArray doInBackground(Void... voids) {
|
576
|
-
try {
|
577
|
-
LlamaContext context = contexts.get(contextId);
|
578
|
-
if (context == null) {
|
579
|
-
throw new Exception("Context not found");
|
580
|
-
}
|
581
|
-
return context.getLoadedLoraAdapters();
|
582
|
-
} catch (Exception e) {
|
583
|
-
exception = e;
|
584
|
-
}
|
585
|
-
return null;
|
586
|
-
}
|
587
|
-
|
588
|
-
@Override
|
589
|
-
protected void onPostExecute(ReadableArray result) {
|
590
|
-
if (exception != null) {
|
591
|
-
promise.reject(exception);
|
592
|
-
return;
|
593
|
-
}
|
594
|
-
promise.resolve(result);
|
595
|
-
}
|
596
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
597
|
-
tasks.put(task, "getLoadedLoraAdapters-" + contextId);
|
598
|
-
}
|
599
|
-
|
600
|
-
public void releaseContext(double id, Promise promise) {
|
601
|
-
final int contextId = (int) id;
|
602
|
-
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
603
|
-
private Exception exception;
|
604
|
-
|
605
|
-
@Override
|
606
|
-
protected Void doInBackground(Void... voids) {
|
607
|
-
try {
|
608
|
-
LlamaContext context = contexts.get(contextId);
|
609
|
-
if (context == null) {
|
610
|
-
throw new Exception("Context " + id + " not found");
|
611
|
-
}
|
612
|
-
context.interruptLoad();
|
613
|
-
context.stopCompletion();
|
614
|
-
AsyncTask completionTask = null;
|
615
|
-
for (AsyncTask task : tasks.keySet()) {
|
616
|
-
if (tasks.get(task).equals("completion-" + contextId)) {
|
617
|
-
task.get();
|
618
|
-
break;
|
619
|
-
}
|
620
|
-
}
|
621
|
-
context.release();
|
622
|
-
contexts.remove(contextId);
|
623
|
-
} catch (Exception e) {
|
624
|
-
exception = e;
|
625
|
-
}
|
626
|
-
return null;
|
627
|
-
}
|
628
|
-
|
629
|
-
@Override
|
630
|
-
protected void onPostExecute(Void result) {
|
631
|
-
if (exception != null) {
|
632
|
-
promise.reject(exception);
|
633
|
-
return;
|
634
|
-
}
|
635
|
-
promise.resolve(null);
|
636
|
-
tasks.remove(this);
|
637
|
-
}
|
638
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
639
|
-
tasks.put(task, "releaseContext-" + contextId);
|
640
|
-
}
|
641
|
-
|
642
|
-
public void releaseAllContexts(Promise promise) {
|
643
|
-
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
644
|
-
private Exception exception;
|
645
|
-
|
646
|
-
@Override
|
647
|
-
protected Void doInBackground(Void... voids) {
|
648
|
-
try {
|
649
|
-
onHostDestroy();
|
650
|
-
} catch (Exception e) {
|
651
|
-
exception = e;
|
652
|
-
}
|
653
|
-
return null;
|
654
|
-
}
|
655
|
-
|
656
|
-
@Override
|
657
|
-
protected void onPostExecute(Void result) {
|
658
|
-
if (exception != null) {
|
659
|
-
promise.reject(exception);
|
660
|
-
return;
|
661
|
-
}
|
662
|
-
promise.resolve(null);
|
663
|
-
tasks.remove(this);
|
664
|
-
}
|
665
|
-
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
666
|
-
tasks.put(task, "releaseAllContexts");
|
667
|
-
}
|
668
|
-
|
669
|
-
@Override
|
670
|
-
public void onHostResume() {
|
671
|
-
}
|
672
|
-
|
673
|
-
@Override
|
674
|
-
public void onHostPause() {
|
675
|
-
}
|
676
|
-
|
677
|
-
@Override
|
678
|
-
public void onHostDestroy() {
|
679
|
-
for (LlamaContext context : contexts.values()) {
|
680
|
-
context.stopCompletion();
|
681
|
-
}
|
682
|
-
for (AsyncTask task : tasks.keySet()) {
|
683
|
-
try {
|
684
|
-
task.get();
|
685
|
-
} catch (Exception e) {
|
686
|
-
Log.e(NAME, "Failed to wait for task", e);
|
687
|
-
}
|
688
|
-
}
|
689
|
-
tasks.clear();
|
690
|
-
for (LlamaContext context : contexts.values()) {
|
691
|
-
context.release();
|
692
|
-
}
|
693
|
-
contexts.clear();
|
694
|
-
}
|
695
|
-
}
|
1
|
+
package com.rnllama;
|
2
|
+
|
3
|
+
import androidx.annotation.NonNull;
|
4
|
+
import android.util.Log;
|
5
|
+
import android.os.Build;
|
6
|
+
import android.os.Handler;
|
7
|
+
import android.os.AsyncTask;
|
8
|
+
|
9
|
+
import com.facebook.react.bridge.Promise;
|
10
|
+
import com.facebook.react.bridge.ReactApplicationContext;
|
11
|
+
import com.facebook.react.bridge.ReactMethod;
|
12
|
+
import com.facebook.react.bridge.LifecycleEventListener;
|
13
|
+
import com.facebook.react.bridge.ReadableMap;
|
14
|
+
import com.facebook.react.bridge.ReadableArray;
|
15
|
+
import com.facebook.react.bridge.WritableMap;
|
16
|
+
import com.facebook.react.bridge.Arguments;
|
17
|
+
|
18
|
+
import java.util.HashMap;
|
19
|
+
import java.util.Random;
|
20
|
+
import java.io.File;
|
21
|
+
import java.io.FileInputStream;
|
22
|
+
import java.io.PushbackInputStream;
|
23
|
+
|
24
|
+
public class RNLlama implements LifecycleEventListener {
|
25
|
+
public static final String NAME = "RNLlama";
|
26
|
+
|
27
|
+
private ReactApplicationContext reactContext;
|
28
|
+
|
29
|
+
public RNLlama(ReactApplicationContext reactContext) {
|
30
|
+
reactContext.addLifecycleEventListener(this);
|
31
|
+
this.reactContext = reactContext;
|
32
|
+
}
|
33
|
+
|
34
|
+
private HashMap<AsyncTask, String> tasks = new HashMap<>();
|
35
|
+
|
36
|
+
private HashMap<Integer, LlamaContext> contexts = new HashMap<>();
|
37
|
+
|
38
|
+
public void toggleNativeLog(boolean enabled, Promise promise) {
|
39
|
+
new AsyncTask<Void, Void, Boolean>() {
|
40
|
+
private Exception exception;
|
41
|
+
|
42
|
+
@Override
|
43
|
+
protected Boolean doInBackground(Void... voids) {
|
44
|
+
try {
|
45
|
+
LlamaContext.toggleNativeLog(reactContext, enabled);
|
46
|
+
return true;
|
47
|
+
} catch (Exception e) {
|
48
|
+
exception = e;
|
49
|
+
}
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
protected void onPostExecute(Boolean result) {
|
55
|
+
if (exception != null) {
|
56
|
+
promise.reject(exception);
|
57
|
+
return;
|
58
|
+
}
|
59
|
+
promise.resolve(result);
|
60
|
+
}
|
61
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
62
|
+
}
|
63
|
+
|
64
|
+
private int llamaContextLimit = -1;
|
65
|
+
|
66
|
+
public void setContextLimit(double limit, Promise promise) {
|
67
|
+
llamaContextLimit = (int) limit;
|
68
|
+
promise.resolve(null);
|
69
|
+
}
|
70
|
+
|
71
|
+
public void modelInfo(final String model, final ReadableArray skip, final Promise promise) {
|
72
|
+
new AsyncTask<Void, Void, WritableMap>() {
|
73
|
+
private Exception exception;
|
74
|
+
|
75
|
+
@Override
|
76
|
+
protected WritableMap doInBackground(Void... voids) {
|
77
|
+
try {
|
78
|
+
String[] skipArray = new String[skip.size()];
|
79
|
+
for (int i = 0; i < skip.size(); i++) {
|
80
|
+
skipArray[i] = skip.getString(i);
|
81
|
+
}
|
82
|
+
return LlamaContext.modelInfo(model, skipArray);
|
83
|
+
} catch (Exception e) {
|
84
|
+
exception = e;
|
85
|
+
}
|
86
|
+
return null;
|
87
|
+
}
|
88
|
+
|
89
|
+
@Override
|
90
|
+
protected void onPostExecute(WritableMap result) {
|
91
|
+
if (exception != null) {
|
92
|
+
promise.reject(exception);
|
93
|
+
return;
|
94
|
+
}
|
95
|
+
promise.resolve(result);
|
96
|
+
}
|
97
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
98
|
+
}
|
99
|
+
|
100
|
+
public void initContext(double id, final ReadableMap params, final Promise promise) {
|
101
|
+
final int contextId = (int) id;
|
102
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
103
|
+
private Exception exception;
|
104
|
+
|
105
|
+
@Override
|
106
|
+
protected WritableMap doInBackground(Void... voids) {
|
107
|
+
try {
|
108
|
+
LlamaContext context = contexts.get(contextId);
|
109
|
+
if (context != null) {
|
110
|
+
throw new Exception("Context already exists");
|
111
|
+
}
|
112
|
+
if (llamaContextLimit > -1 && contexts.size() >= llamaContextLimit) {
|
113
|
+
throw new Exception("Context limit reached");
|
114
|
+
}
|
115
|
+
LlamaContext llamaContext = new LlamaContext(contextId, reactContext, params);
|
116
|
+
if (llamaContext.getContext() == 0) {
|
117
|
+
throw new Exception("Failed to initialize context");
|
118
|
+
}
|
119
|
+
contexts.put(contextId, llamaContext);
|
120
|
+
WritableMap result = Arguments.createMap();
|
121
|
+
result.putBoolean("gpu", false);
|
122
|
+
result.putString("reasonNoGPU", "Currently not supported");
|
123
|
+
result.putMap("model", llamaContext.getModelDetails());
|
124
|
+
result.putString("androidLib", llamaContext.getLoadedLibrary());
|
125
|
+
return result;
|
126
|
+
} catch (Exception e) {
|
127
|
+
exception = e;
|
128
|
+
return null;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
@Override
|
133
|
+
protected void onPostExecute(WritableMap result) {
|
134
|
+
if (exception != null) {
|
135
|
+
promise.reject(exception);
|
136
|
+
return;
|
137
|
+
}
|
138
|
+
promise.resolve(result);
|
139
|
+
tasks.remove(this);
|
140
|
+
}
|
141
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
142
|
+
tasks.put(task, "initContext");
|
143
|
+
}
|
144
|
+
|
145
|
+
public void getFormattedChat(double id, final String messages, final String chatTemplate, final ReadableMap params, Promise promise) {
|
146
|
+
final int contextId = (int) id;
|
147
|
+
AsyncTask task = new AsyncTask<Void, Void, Object>() {
|
148
|
+
private Exception exception;
|
149
|
+
|
150
|
+
@Override
|
151
|
+
protected Object doInBackground(Void... voids) {
|
152
|
+
try {
|
153
|
+
LlamaContext context = contexts.get(contextId);
|
154
|
+
if (context == null) {
|
155
|
+
throw new Exception("Context not found");
|
156
|
+
}
|
157
|
+
if (params.hasKey("jinja") && params.getBoolean("jinja")) {
|
158
|
+
ReadableMap result = context.getFormattedChatWithJinja(messages, chatTemplate, params);
|
159
|
+
if (result.hasKey("_error")) {
|
160
|
+
throw new Exception(result.getString("_error"));
|
161
|
+
}
|
162
|
+
return result;
|
163
|
+
}
|
164
|
+
return context.getFormattedChat(messages, chatTemplate);
|
165
|
+
} catch (Exception e) {
|
166
|
+
exception = e;
|
167
|
+
return null;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
@Override
|
172
|
+
protected void onPostExecute(Object result) {
|
173
|
+
if (exception != null) {
|
174
|
+
promise.reject(exception);
|
175
|
+
return;
|
176
|
+
}
|
177
|
+
promise.resolve(result);
|
178
|
+
tasks.remove(this);
|
179
|
+
}
|
180
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
181
|
+
tasks.put(task, "getFormattedChat-" + contextId);
|
182
|
+
}
|
183
|
+
|
184
|
+
public void loadSession(double id, final String path, Promise promise) {
|
185
|
+
final int contextId = (int) id;
|
186
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
187
|
+
private Exception exception;
|
188
|
+
|
189
|
+
@Override
|
190
|
+
protected WritableMap doInBackground(Void... voids) {
|
191
|
+
try {
|
192
|
+
LlamaContext context = contexts.get(contextId);
|
193
|
+
if (context == null) {
|
194
|
+
throw new Exception("Context not found");
|
195
|
+
}
|
196
|
+
WritableMap result = context.loadSession(path);
|
197
|
+
return result;
|
198
|
+
} catch (Exception e) {
|
199
|
+
exception = e;
|
200
|
+
}
|
201
|
+
return null;
|
202
|
+
}
|
203
|
+
|
204
|
+
@Override
|
205
|
+
protected void onPostExecute(WritableMap result) {
|
206
|
+
if (exception != null) {
|
207
|
+
promise.reject(exception);
|
208
|
+
return;
|
209
|
+
}
|
210
|
+
promise.resolve(result);
|
211
|
+
tasks.remove(this);
|
212
|
+
}
|
213
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
214
|
+
tasks.put(task, "loadSession-" + contextId);
|
215
|
+
}
|
216
|
+
|
217
|
+
public void saveSession(double id, final String path, double size, Promise promise) {
|
218
|
+
final int contextId = (int) id;
|
219
|
+
AsyncTask task = new AsyncTask<Void, Void, Integer>() {
|
220
|
+
private Exception exception;
|
221
|
+
|
222
|
+
@Override
|
223
|
+
protected Integer doInBackground(Void... voids) {
|
224
|
+
try {
|
225
|
+
LlamaContext context = contexts.get(contextId);
|
226
|
+
if (context == null) {
|
227
|
+
throw new Exception("Context not found");
|
228
|
+
}
|
229
|
+
Integer count = context.saveSession(path, (int) size);
|
230
|
+
return count;
|
231
|
+
} catch (Exception e) {
|
232
|
+
exception = e;
|
233
|
+
}
|
234
|
+
return -1;
|
235
|
+
}
|
236
|
+
|
237
|
+
@Override
|
238
|
+
protected void onPostExecute(Integer result) {
|
239
|
+
if (exception != null) {
|
240
|
+
promise.reject(exception);
|
241
|
+
return;
|
242
|
+
}
|
243
|
+
promise.resolve(result);
|
244
|
+
tasks.remove(this);
|
245
|
+
}
|
246
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
247
|
+
tasks.put(task, "saveSession-" + contextId);
|
248
|
+
}
|
249
|
+
|
250
|
+
public void completion(double id, final ReadableMap params, final Promise promise) {
|
251
|
+
final int contextId = (int) id;
|
252
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
253
|
+
private Exception exception;
|
254
|
+
|
255
|
+
@Override
|
256
|
+
protected WritableMap doInBackground(Void... voids) {
|
257
|
+
try {
|
258
|
+
LlamaContext context = contexts.get(contextId);
|
259
|
+
if (context == null) {
|
260
|
+
throw new Exception("Context not found");
|
261
|
+
}
|
262
|
+
if (context.isPredicting()) {
|
263
|
+
throw new Exception("Context is busy");
|
264
|
+
}
|
265
|
+
WritableMap result = context.completion(params);
|
266
|
+
return result;
|
267
|
+
} catch (Exception e) {
|
268
|
+
exception = e;
|
269
|
+
}
|
270
|
+
return null;
|
271
|
+
}
|
272
|
+
|
273
|
+
@Override
|
274
|
+
protected void onPostExecute(WritableMap result) {
|
275
|
+
if (exception != null) {
|
276
|
+
promise.reject(exception);
|
277
|
+
return;
|
278
|
+
}
|
279
|
+
promise.resolve(result);
|
280
|
+
tasks.remove(this);
|
281
|
+
}
|
282
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
283
|
+
tasks.put(task, "completion-" + contextId);
|
284
|
+
}
|
285
|
+
|
286
|
+
public void stopCompletion(double id, final Promise promise) {
|
287
|
+
final int contextId = (int) id;
|
288
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
289
|
+
private Exception exception;
|
290
|
+
|
291
|
+
@Override
|
292
|
+
protected Void doInBackground(Void... voids) {
|
293
|
+
try {
|
294
|
+
LlamaContext context = contexts.get(contextId);
|
295
|
+
if (context == null) {
|
296
|
+
throw new Exception("Context not found");
|
297
|
+
}
|
298
|
+
context.stopCompletion();
|
299
|
+
AsyncTask completionTask = null;
|
300
|
+
for (AsyncTask task : tasks.keySet()) {
|
301
|
+
if (tasks.get(task).equals("completion-" + contextId)) {
|
302
|
+
task.get();
|
303
|
+
break;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
} catch (Exception e) {
|
307
|
+
exception = e;
|
308
|
+
}
|
309
|
+
return null;
|
310
|
+
}
|
311
|
+
|
312
|
+
@Override
|
313
|
+
protected void onPostExecute(Void result) {
|
314
|
+
if (exception != null) {
|
315
|
+
promise.reject(exception);
|
316
|
+
return;
|
317
|
+
}
|
318
|
+
promise.resolve(result);
|
319
|
+
tasks.remove(this);
|
320
|
+
}
|
321
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
322
|
+
tasks.put(task, "stopCompletion-" + contextId);
|
323
|
+
}
|
324
|
+
|
325
|
+
public void tokenizeAsync(double id, final String text, final Promise promise) {
|
326
|
+
final int contextId = (int) id;
|
327
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
328
|
+
private Exception exception;
|
329
|
+
|
330
|
+
@Override
|
331
|
+
protected WritableMap doInBackground(Void... voids) {
|
332
|
+
try {
|
333
|
+
LlamaContext context = contexts.get(contextId);
|
334
|
+
if (context == null) {
|
335
|
+
throw new Exception("Context not found");
|
336
|
+
}
|
337
|
+
return context.tokenize(text);
|
338
|
+
} catch (Exception e) {
|
339
|
+
exception = e;
|
340
|
+
}
|
341
|
+
return null;
|
342
|
+
}
|
343
|
+
|
344
|
+
@Override
|
345
|
+
protected void onPostExecute(WritableMap result) {
|
346
|
+
if (exception != null) {
|
347
|
+
promise.reject(exception);
|
348
|
+
return;
|
349
|
+
}
|
350
|
+
promise.resolve(result);
|
351
|
+
tasks.remove(this);
|
352
|
+
}
|
353
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
354
|
+
tasks.put(task, "tokenize-" + contextId);
|
355
|
+
}
|
356
|
+
|
357
|
+
public WritableMap tokenizeSync(double id, final String text) {
|
358
|
+
int contextId = (int) id;
|
359
|
+
LlamaContext context = contexts.get(contextId);
|
360
|
+
if (context == null) {
|
361
|
+
return Arguments.createMap();
|
362
|
+
}
|
363
|
+
return context.tokenize(text);
|
364
|
+
}
|
365
|
+
|
366
|
+
public void getCpuFeatures(Promise promise) {
|
367
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
368
|
+
private Exception exception;
|
369
|
+
@Override
|
370
|
+
protected WritableMap doInBackground(Void... voids) {
|
371
|
+
try {
|
372
|
+
WritableMap result = Arguments.createMap();
|
373
|
+
boolean isV8 = LlamaContext.isArm64V8a();
|
374
|
+
result.putBoolean("armv8", isV8);
|
375
|
+
|
376
|
+
if(isV8) {
|
377
|
+
String cpuFeatures = LlamaContext.getCpuFeatures();
|
378
|
+
boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
|
379
|
+
boolean hasInt8Matmul = cpuFeatures.contains("i8mm");
|
380
|
+
result.putBoolean("i8mm", hasInt8Matmul);
|
381
|
+
result.putBoolean("dotprod", hasDotProd);
|
382
|
+
} else {
|
383
|
+
result.putBoolean("i8mm", false);
|
384
|
+
result.putBoolean("dotprod", false);
|
385
|
+
}
|
386
|
+
return result;
|
387
|
+
} catch (Exception e) {
|
388
|
+
exception = e;
|
389
|
+
return null;
|
390
|
+
}
|
391
|
+
}
|
392
|
+
|
393
|
+
@Override
|
394
|
+
protected void onPostExecute(WritableMap result) {
|
395
|
+
if (exception != null) {
|
396
|
+
promise.reject(exception);
|
397
|
+
return;
|
398
|
+
}
|
399
|
+
promise.resolve(result);
|
400
|
+
tasks.remove(this);
|
401
|
+
}
|
402
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
403
|
+
tasks.put(task, "getCPUFeatures");
|
404
|
+
}
|
405
|
+
|
406
|
+
public void detokenize(double id, final ReadableArray tokens, final Promise promise) {
|
407
|
+
final int contextId = (int) id;
|
408
|
+
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
409
|
+
private Exception exception;
|
410
|
+
|
411
|
+
@Override
|
412
|
+
protected String doInBackground(Void... voids) {
|
413
|
+
try {
|
414
|
+
LlamaContext context = contexts.get(contextId);
|
415
|
+
if (context == null) {
|
416
|
+
throw new Exception("Context not found");
|
417
|
+
}
|
418
|
+
return context.detokenize(tokens);
|
419
|
+
} catch (Exception e) {
|
420
|
+
exception = e;
|
421
|
+
}
|
422
|
+
return null;
|
423
|
+
}
|
424
|
+
|
425
|
+
@Override
|
426
|
+
protected void onPostExecute(String result) {
|
427
|
+
if (exception != null) {
|
428
|
+
promise.reject(exception);
|
429
|
+
return;
|
430
|
+
}
|
431
|
+
promise.resolve(result);
|
432
|
+
tasks.remove(this);
|
433
|
+
}
|
434
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
435
|
+
tasks.put(task, "detokenize-" + contextId);
|
436
|
+
}
|
437
|
+
|
438
|
+
public void embedding(double id, final String text, final ReadableMap params, final Promise promise) {
|
439
|
+
final int contextId = (int) id;
|
440
|
+
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
441
|
+
private Exception exception;
|
442
|
+
|
443
|
+
@Override
|
444
|
+
protected WritableMap doInBackground(Void... voids) {
|
445
|
+
try {
|
446
|
+
LlamaContext context = contexts.get(contextId);
|
447
|
+
if (context == null) {
|
448
|
+
throw new Exception("Context not found");
|
449
|
+
}
|
450
|
+
return context.getEmbedding(text, params);
|
451
|
+
} catch (Exception e) {
|
452
|
+
exception = e;
|
453
|
+
}
|
454
|
+
return null;
|
455
|
+
}
|
456
|
+
|
457
|
+
@Override
|
458
|
+
protected void onPostExecute(WritableMap result) {
|
459
|
+
if (exception != null) {
|
460
|
+
promise.reject(exception);
|
461
|
+
return;
|
462
|
+
}
|
463
|
+
promise.resolve(result);
|
464
|
+
tasks.remove(this);
|
465
|
+
}
|
466
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
467
|
+
tasks.put(task, "embedding-" + contextId);
|
468
|
+
}
|
469
|
+
|
470
|
+
public void bench(double id, final double pp, final double tg, final double pl, final double nr, final Promise promise) {
|
471
|
+
final int contextId = (int) id;
|
472
|
+
AsyncTask task = new AsyncTask<Void, Void, String>() {
|
473
|
+
private Exception exception;
|
474
|
+
|
475
|
+
@Override
|
476
|
+
protected String doInBackground(Void... voids) {
|
477
|
+
try {
|
478
|
+
LlamaContext context = contexts.get(contextId);
|
479
|
+
if (context == null) {
|
480
|
+
throw new Exception("Context not found");
|
481
|
+
}
|
482
|
+
return context.bench((int) pp, (int) tg, (int) pl, (int) nr);
|
483
|
+
} catch (Exception e) {
|
484
|
+
exception = e;
|
485
|
+
}
|
486
|
+
return null;
|
487
|
+
}
|
488
|
+
|
489
|
+
@Override
|
490
|
+
protected void onPostExecute(String result) {
|
491
|
+
if (exception != null) {
|
492
|
+
promise.reject(exception);
|
493
|
+
return;
|
494
|
+
}
|
495
|
+
promise.resolve(result);
|
496
|
+
tasks.remove(this);
|
497
|
+
}
|
498
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
499
|
+
tasks.put(task, "bench-" + contextId);
|
500
|
+
}
|
501
|
+
|
502
|
+
public void applyLoraAdapters(double id, final ReadableArray loraAdapters, final Promise promise) {
|
503
|
+
final int contextId = (int) id;
|
504
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
505
|
+
private Exception exception;
|
506
|
+
|
507
|
+
@Override
|
508
|
+
protected Void doInBackground(Void... voids) {
|
509
|
+
try {
|
510
|
+
LlamaContext context = contexts.get(contextId);
|
511
|
+
if (context == null) {
|
512
|
+
throw new Exception("Context not found");
|
513
|
+
}
|
514
|
+
if (context.isPredicting()) {
|
515
|
+
throw new Exception("Context is busy");
|
516
|
+
}
|
517
|
+
context.applyLoraAdapters(loraAdapters);
|
518
|
+
} catch (Exception e) {
|
519
|
+
exception = e;
|
520
|
+
}
|
521
|
+
return null;
|
522
|
+
}
|
523
|
+
|
524
|
+
@Override
|
525
|
+
protected void onPostExecute(Void result) {
|
526
|
+
if (exception != null) {
|
527
|
+
promise.reject(exception);
|
528
|
+
return;
|
529
|
+
}
|
530
|
+
}
|
531
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
532
|
+
tasks.put(task, "applyLoraAdapters-" + contextId);
|
533
|
+
}
|
534
|
+
|
535
|
+
public void removeLoraAdapters(double id, final Promise promise) {
|
536
|
+
final int contextId = (int) id;
|
537
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
538
|
+
private Exception exception;
|
539
|
+
|
540
|
+
@Override
|
541
|
+
protected Void doInBackground(Void... voids) {
|
542
|
+
try {
|
543
|
+
LlamaContext context = contexts.get(contextId);
|
544
|
+
if (context == null) {
|
545
|
+
throw new Exception("Context not found");
|
546
|
+
}
|
547
|
+
if (context.isPredicting()) {
|
548
|
+
throw new Exception("Context is busy");
|
549
|
+
}
|
550
|
+
context.removeLoraAdapters();
|
551
|
+
} catch (Exception e) {
|
552
|
+
exception = e;
|
553
|
+
}
|
554
|
+
return null;
|
555
|
+
}
|
556
|
+
|
557
|
+
@Override
|
558
|
+
protected void onPostExecute(Void result) {
|
559
|
+
if (exception != null) {
|
560
|
+
promise.reject(exception);
|
561
|
+
return;
|
562
|
+
}
|
563
|
+
promise.resolve(null);
|
564
|
+
}
|
565
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
566
|
+
tasks.put(task, "removeLoraAdapters-" + contextId);
|
567
|
+
}
|
568
|
+
|
569
|
+
public void getLoadedLoraAdapters(double id, final Promise promise) {
|
570
|
+
final int contextId = (int) id;
|
571
|
+
AsyncTask task = new AsyncTask<Void, Void, ReadableArray>() {
|
572
|
+
private Exception exception;
|
573
|
+
|
574
|
+
@Override
|
575
|
+
protected ReadableArray doInBackground(Void... voids) {
|
576
|
+
try {
|
577
|
+
LlamaContext context = contexts.get(contextId);
|
578
|
+
if (context == null) {
|
579
|
+
throw new Exception("Context not found");
|
580
|
+
}
|
581
|
+
return context.getLoadedLoraAdapters();
|
582
|
+
} catch (Exception e) {
|
583
|
+
exception = e;
|
584
|
+
}
|
585
|
+
return null;
|
586
|
+
}
|
587
|
+
|
588
|
+
@Override
|
589
|
+
protected void onPostExecute(ReadableArray result) {
|
590
|
+
if (exception != null) {
|
591
|
+
promise.reject(exception);
|
592
|
+
return;
|
593
|
+
}
|
594
|
+
promise.resolve(result);
|
595
|
+
}
|
596
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
597
|
+
tasks.put(task, "getLoadedLoraAdapters-" + contextId);
|
598
|
+
}
|
599
|
+
|
600
|
+
public void releaseContext(double id, Promise promise) {
|
601
|
+
final int contextId = (int) id;
|
602
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
603
|
+
private Exception exception;
|
604
|
+
|
605
|
+
@Override
|
606
|
+
protected Void doInBackground(Void... voids) {
|
607
|
+
try {
|
608
|
+
LlamaContext context = contexts.get(contextId);
|
609
|
+
if (context == null) {
|
610
|
+
throw new Exception("Context " + id + " not found");
|
611
|
+
}
|
612
|
+
context.interruptLoad();
|
613
|
+
context.stopCompletion();
|
614
|
+
AsyncTask completionTask = null;
|
615
|
+
for (AsyncTask task : tasks.keySet()) {
|
616
|
+
if (tasks.get(task).equals("completion-" + contextId)) {
|
617
|
+
task.get();
|
618
|
+
break;
|
619
|
+
}
|
620
|
+
}
|
621
|
+
context.release();
|
622
|
+
contexts.remove(contextId);
|
623
|
+
} catch (Exception e) {
|
624
|
+
exception = e;
|
625
|
+
}
|
626
|
+
return null;
|
627
|
+
}
|
628
|
+
|
629
|
+
@Override
|
630
|
+
protected void onPostExecute(Void result) {
|
631
|
+
if (exception != null) {
|
632
|
+
promise.reject(exception);
|
633
|
+
return;
|
634
|
+
}
|
635
|
+
promise.resolve(null);
|
636
|
+
tasks.remove(this);
|
637
|
+
}
|
638
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
639
|
+
tasks.put(task, "releaseContext-" + contextId);
|
640
|
+
}
|
641
|
+
|
642
|
+
public void releaseAllContexts(Promise promise) {
|
643
|
+
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
644
|
+
private Exception exception;
|
645
|
+
|
646
|
+
@Override
|
647
|
+
protected Void doInBackground(Void... voids) {
|
648
|
+
try {
|
649
|
+
onHostDestroy();
|
650
|
+
} catch (Exception e) {
|
651
|
+
exception = e;
|
652
|
+
}
|
653
|
+
return null;
|
654
|
+
}
|
655
|
+
|
656
|
+
@Override
|
657
|
+
protected void onPostExecute(Void result) {
|
658
|
+
if (exception != null) {
|
659
|
+
promise.reject(exception);
|
660
|
+
return;
|
661
|
+
}
|
662
|
+
promise.resolve(null);
|
663
|
+
tasks.remove(this);
|
664
|
+
}
|
665
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
666
|
+
tasks.put(task, "releaseAllContexts");
|
667
|
+
}
|
668
|
+
|
669
|
+
@Override
|
670
|
+
public void onHostResume() {
|
671
|
+
}
|
672
|
+
|
673
|
+
@Override
|
674
|
+
public void onHostPause() {
|
675
|
+
}
|
676
|
+
|
677
|
+
@Override
|
678
|
+
public void onHostDestroy() {
|
679
|
+
for (LlamaContext context : contexts.values()) {
|
680
|
+
context.stopCompletion();
|
681
|
+
}
|
682
|
+
for (AsyncTask task : tasks.keySet()) {
|
683
|
+
try {
|
684
|
+
task.get();
|
685
|
+
} catch (Exception e) {
|
686
|
+
Log.e(NAME, "Failed to wait for task", e);
|
687
|
+
}
|
688
|
+
}
|
689
|
+
tasks.clear();
|
690
|
+
for (LlamaContext context : contexts.values()) {
|
691
|
+
context.release();
|
692
|
+
}
|
693
|
+
contexts.clear();
|
694
|
+
}
|
695
|
+
}
|