cui-llama.rn 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +317 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +124 -117
- package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1263 -1245
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/README.md +4 -4
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -523
- package/cpp/ggml-metal.m +798 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +30 -6
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +873 -882
- package/cpp/rn-llama.h +138 -148
- package/cpp/sampling.cpp +3 -0
- package/cpp/sampling.h +107 -107
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +116 -105
- package/ios/RNLlama.h +7 -7
- package/ios/RNLlama.mm +418 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +835 -819
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +6 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +426 -424
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -485
package/cpp/llama.h
CHANGED
@@ -61,6 +61,7 @@ extern "C" {
|
|
61
61
|
struct llama_model;
|
62
62
|
struct llama_context;
|
63
63
|
struct llama_sampler;
|
64
|
+
struct llama_kv_cache;
|
64
65
|
|
65
66
|
typedef int32_t llama_pos;
|
66
67
|
typedef int32_t llama_token;
|
@@ -107,6 +108,10 @@ extern "C" {
|
|
107
108
|
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
108
109
|
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
109
110
|
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
111
|
+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
112
|
+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
113
|
+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
114
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
110
115
|
};
|
111
116
|
|
112
117
|
enum llama_rope_type {
|
@@ -277,10 +282,18 @@ extern "C" {
|
|
277
282
|
};
|
278
283
|
};
|
279
284
|
|
285
|
+
struct llama_model_tensor_buft_override {
|
286
|
+
const char * pattern;
|
287
|
+
lm_ggml_backend_buffer_type_t buft;
|
288
|
+
};
|
289
|
+
|
280
290
|
struct llama_model_params {
|
281
291
|
// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
282
292
|
lm_ggml_backend_dev_t * devices;
|
283
293
|
|
294
|
+
// NULL-terminated list of buffer types to use for tensors that match a pattern
|
295
|
+
const struct llama_model_tensor_buft_override * tensor_buft_overrides;
|
296
|
+
|
284
297
|
int32_t n_gpu_layers; // number of layers to store in VRAM
|
285
298
|
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
|
286
299
|
|
@@ -470,7 +483,8 @@ extern "C" {
|
|
470
483
|
DEPRECATED(LLAMA_API int32_t llama_n_vocab (const struct llama_vocab * vocab), "use llama_vocab_n_tokens instead");
|
471
484
|
|
472
485
|
LLAMA_API const struct llama_model * llama_get_model (const struct llama_context * ctx);
|
473
|
-
LLAMA_API
|
486
|
+
LLAMA_API struct llama_kv_cache * llama_get_kv_self ( struct llama_context * ctx);
|
487
|
+
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); // TODO: rename to llama_get_pooling_type
|
474
488
|
|
475
489
|
LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
|
476
490
|
LLAMA_API enum llama_rope_type llama_model_rope_type(const struct llama_model * model);
|
@@ -587,7 +601,7 @@ extern "C" {
|
|
587
601
|
// KV cache
|
588
602
|
//
|
589
603
|
|
590
|
-
// TODO:
|
604
|
+
// TODO: start using struct llama_kv_cache
|
591
605
|
|
592
606
|
// Information associated with an individual cell in the KV cache view.
|
593
607
|
struct llama_kv_cache_view_cell {
|
@@ -642,13 +656,19 @@ extern "C" {
|
|
642
656
|
|
643
657
|
// Returns the number of tokens in the KV cache (slow, use only for debug)
|
644
658
|
// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
|
645
|
-
LLAMA_API int32_t
|
659
|
+
LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
|
660
|
+
|
661
|
+
DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx),
|
662
|
+
"use llama_kv_self_n_tokens instead");
|
646
663
|
|
647
664
|
// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
|
648
|
-
LLAMA_API int32_t
|
665
|
+
LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx);
|
666
|
+
|
667
|
+
DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx),
|
668
|
+
"use llama_kv_self_used_cells instead");
|
649
669
|
|
650
670
|
// Clear the KV cache - both cell info is erased and KV data is zeroed
|
651
|
-
LLAMA_API void
|
671
|
+
LLAMA_API void llama_kv_self_clear(
|
652
672
|
struct llama_context * ctx);
|
653
673
|
|
654
674
|
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
@@ -656,7 +676,7 @@ extern "C" {
|
|
656
676
|
// seq_id < 0 : match any sequence
|
657
677
|
// p0 < 0 : [0, p1]
|
658
678
|
// p1 < 0 : [p0, inf)
|
659
|
-
LLAMA_API bool
|
679
|
+
LLAMA_API bool llama_kv_self_seq_rm(
|
660
680
|
struct llama_context * ctx,
|
661
681
|
llama_seq_id seq_id,
|
662
682
|
llama_pos p0,
|
@@ -666,7 +686,7 @@ extern "C" {
|
|
666
686
|
// Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
|
667
687
|
// p0 < 0 : [0, p1]
|
668
688
|
// p1 < 0 : [p0, inf)
|
669
|
-
LLAMA_API void
|
689
|
+
LLAMA_API void llama_kv_self_seq_cp(
|
670
690
|
struct llama_context * ctx,
|
671
691
|
llama_seq_id seq_id_src,
|
672
692
|
llama_seq_id seq_id_dst,
|
@@ -674,17 +694,17 @@ extern "C" {
|
|
674
694
|
llama_pos p1);
|
675
695
|
|
676
696
|
// Removes all tokens that do not belong to the specified sequence
|
677
|
-
LLAMA_API void
|
697
|
+
LLAMA_API void llama_kv_self_seq_keep(
|
678
698
|
struct llama_context * ctx,
|
679
699
|
llama_seq_id seq_id);
|
680
700
|
|
681
701
|
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
682
702
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
683
703
|
// - lazily on next llama_decode()
|
684
|
-
// - explicitly with
|
704
|
+
// - explicitly with llama_kv_self_update()
|
685
705
|
// p0 < 0 : [0, p1]
|
686
706
|
// p1 < 0 : [p0, inf)
|
687
|
-
LLAMA_API void
|
707
|
+
LLAMA_API void llama_kv_self_seq_add(
|
688
708
|
struct llama_context * ctx,
|
689
709
|
llama_seq_id seq_id,
|
690
710
|
llama_pos p0,
|
@@ -694,10 +714,10 @@ extern "C" {
|
|
694
714
|
// Integer division of the positions by factor of `d > 1`
|
695
715
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
696
716
|
// - lazily on next llama_decode()
|
697
|
-
// - explicitly with
|
717
|
+
// - explicitly with llama_kv_self_update()
|
698
718
|
// p0 < 0 : [0, p1]
|
699
719
|
// p1 < 0 : [p0, inf)
|
700
|
-
LLAMA_API void
|
720
|
+
LLAMA_API void llama_kv_self_seq_div(
|
701
721
|
struct llama_context * ctx,
|
702
722
|
llama_seq_id seq_id,
|
703
723
|
llama_pos p0,
|
@@ -705,24 +725,76 @@ extern "C" {
|
|
705
725
|
int d);
|
706
726
|
|
707
727
|
// Returns the largest position present in the KV cache for the specified sequence
|
708
|
-
LLAMA_API llama_pos
|
728
|
+
LLAMA_API llama_pos llama_kv_self_seq_pos_max(
|
709
729
|
struct llama_context * ctx,
|
710
|
-
|
711
|
-
|
712
|
-
// TODO: the llama_kv_cache_defrag and llama_kv_cache_update API tightly couples llama_context with llama_kv_cache
|
713
|
-
// how to avoid this?
|
730
|
+
llama_seq_id seq_id);
|
714
731
|
|
715
732
|
// Defragment the KV cache
|
716
733
|
// This will be applied:
|
717
734
|
// - lazily on next llama_decode()
|
718
|
-
// - explicitly with
|
719
|
-
LLAMA_API void
|
735
|
+
// - explicitly with llama_kv_self_update()
|
736
|
+
LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
|
737
|
+
|
738
|
+
// Check if the context supports KV cache shifting
|
739
|
+
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
|
720
740
|
|
721
741
|
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
722
|
-
LLAMA_API void
|
742
|
+
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
|
743
|
+
|
744
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_clear(
|
745
|
+
struct llama_context * ctx),
|
746
|
+
"use llama_kv_self_clear instead");
|
747
|
+
|
748
|
+
DEPRECATED(LLAMA_API bool llama_kv_cache_seq_rm(
|
749
|
+
struct llama_context * ctx,
|
750
|
+
llama_seq_id seq_id,
|
751
|
+
llama_pos p0,
|
752
|
+
llama_pos p1),
|
753
|
+
"use llama_kv_self_seq_rm instead");
|
754
|
+
|
755
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_seq_cp(
|
756
|
+
struct llama_context * ctx,
|
757
|
+
llama_seq_id seq_id_src,
|
758
|
+
llama_seq_id seq_id_dst,
|
759
|
+
llama_pos p0,
|
760
|
+
llama_pos p1),
|
761
|
+
"use llama_kv_self_seq_cp instead");
|
762
|
+
|
763
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_seq_keep(
|
764
|
+
struct llama_context * ctx,
|
765
|
+
llama_seq_id seq_id),
|
766
|
+
"use llama_kv_self_seq_keep instead");
|
767
|
+
|
768
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_seq_add(
|
769
|
+
struct llama_context * ctx,
|
770
|
+
llama_seq_id seq_id,
|
771
|
+
llama_pos p0,
|
772
|
+
llama_pos p1,
|
773
|
+
llama_pos delta),
|
774
|
+
"use llama_kv_self_seq_add instead");
|
775
|
+
|
776
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_seq_div(
|
777
|
+
struct llama_context * ctx,
|
778
|
+
llama_seq_id seq_id,
|
779
|
+
llama_pos p0,
|
780
|
+
llama_pos p1,
|
781
|
+
int d),
|
782
|
+
"use llama_kv_self_seq_div instead");
|
783
|
+
|
784
|
+
DEPRECATED(LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
|
785
|
+
struct llama_context * ctx,
|
786
|
+
llama_seq_id seq_id),
|
787
|
+
"use llama_kv_self_seq_pos_max instead");
|
788
|
+
|
789
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx),
|
790
|
+
"use llama_kv_self_defrag instead");
|
791
|
+
|
792
|
+
DEPRECATED(LLAMA_API bool llama_kv_cache_can_shift(const struct llama_context * ctx),
|
793
|
+
"use llama_kv_self_can_shift instead");
|
794
|
+
|
795
|
+
DEPRECATED(LLAMA_API void llama_kv_cache_update(struct llama_context * ctx),
|
796
|
+
"use llama_kv_self_update instead");
|
723
797
|
|
724
|
-
// Check if the context supports KV cache shifting
|
725
|
-
LLAMA_API bool llama_kv_cache_can_shift(struct llama_context * ctx);
|
726
798
|
|
727
799
|
//
|
728
800
|
// State / sessions
|
@@ -886,6 +958,10 @@ extern "C" {
|
|
886
958
|
// If set to true, the model will only attend to the past tokens
|
887
959
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
888
960
|
|
961
|
+
// Set whether the model is in warmup mode or not
|
962
|
+
// If true, all model tensors are activated during llama_decode() to load and cache their weights.
|
963
|
+
LLAMA_API void llama_set_warmup(struct llama_context * ctx, bool warmup);
|
964
|
+
|
889
965
|
// Set abort callback
|
890
966
|
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, lm_ggml_abort_callback abort_callback, void * abort_callback_data);
|
891
967
|
|
@@ -1201,6 +1277,10 @@ extern "C" {
|
|
1201
1277
|
float tau,
|
1202
1278
|
float eta);
|
1203
1279
|
|
1280
|
+
/// @details Intializes a GBNF grammar, see grammars/README.md for details.
|
1281
|
+
/// @param vocab The vocabulary that this grammar will be used with.
|
1282
|
+
/// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
|
1283
|
+
/// @param grammar_root The name of the start symbol for the grammar.
|
1204
1284
|
LLAMA_API struct llama_sampler * llama_sampler_init_grammar(
|
1205
1285
|
const struct llama_vocab * vocab,
|
1206
1286
|
const char * grammar_str,
|
package/cpp/log.cpp
CHANGED
@@ -9,6 +9,10 @@
|
|
9
9
|
#include <thread>
|
10
10
|
#include <vector>
|
11
11
|
|
12
|
+
#if defined(__ANDROID__) && defined(RNLLAMA_ANDROID_ENABLE_LOGGING)
|
13
|
+
#include <android/log.h>
|
14
|
+
#endif
|
15
|
+
|
12
16
|
int common_log_verbosity_thold = LOG_DEFAULT_LLAMA;
|
13
17
|
|
14
18
|
void common_log_set_verbosity_thold(int verbosity) {
|
@@ -57,7 +61,36 @@ struct common_log_entry {
|
|
57
61
|
// signals the worker thread to stop
|
58
62
|
bool is_end;
|
59
63
|
|
64
|
+
#if defined(__ANDROID__) && defined(RNLLAMA_ANDROID_ENABLE_LOGGING)
|
65
|
+
void android_print() const {
|
66
|
+
int android_log_priority;
|
67
|
+
switch (level) {
|
68
|
+
case LM_GGML_LOG_LEVEL_INFO:
|
69
|
+
android_log_priority = ANDROID_LOG_INFO;
|
70
|
+
break;
|
71
|
+
case LM_GGML_LOG_LEVEL_WARN:
|
72
|
+
android_log_priority = ANDROID_LOG_WARN;
|
73
|
+
break;
|
74
|
+
case LM_GGML_LOG_LEVEL_ERROR:
|
75
|
+
android_log_priority = ANDROID_LOG_ERROR;
|
76
|
+
break;
|
77
|
+
case LM_GGML_LOG_LEVEL_DEBUG:
|
78
|
+
android_log_priority = ANDROID_LOG_DEBUG;
|
79
|
+
break;
|
80
|
+
default:
|
81
|
+
android_log_priority = ANDROID_LOG_DEFAULT;
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
|
85
|
+
const char * tag = "RNLLAMA_LOG_ANDROID";
|
86
|
+
__android_log_print(android_log_priority, tag, "%s", msg.data());
|
87
|
+
}
|
88
|
+
#endif
|
89
|
+
|
60
90
|
void print(FILE * file = nullptr) const {
|
91
|
+
#if defined(__ANDROID__) && defined(RNLLAMA_ANDROID_ENABLE_LOGGING)
|
92
|
+
android_print();
|
93
|
+
#else
|
61
94
|
FILE * fcur = file;
|
62
95
|
if (!fcur) {
|
63
96
|
// stderr displays DBG messages only when their verbosity level is not higher than the threshold
|
@@ -102,6 +135,7 @@ struct common_log_entry {
|
|
102
135
|
}
|
103
136
|
|
104
137
|
fflush(fcur);
|
138
|
+
#endif
|
105
139
|
}
|
106
140
|
};
|
107
141
|
|
@@ -9,10 +9,19 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "minja.hpp"
|
12
|
-
|
12
|
+
|
13
|
+
#include <chrono>
|
14
|
+
#include <cstddef>
|
15
|
+
#include <cstdio>
|
16
|
+
#include <exception>
|
17
|
+
#include <iomanip>
|
18
|
+
#include <memory>
|
19
|
+
#include <sstream>
|
13
20
|
#include <string>
|
14
21
|
#include <vector>
|
15
22
|
|
23
|
+
#include <json.hpp>
|
24
|
+
|
16
25
|
using json = nlohmann::ordered_json;
|
17
26
|
|
18
27
|
namespace minja {
|
@@ -425,7 +434,7 @@ class chat_template {
|
|
425
434
|
auto obj = json {
|
426
435
|
{"tool_calls", tool_calls},
|
427
436
|
};
|
428
|
-
if (!content.is_null() && content
|
437
|
+
if (!content.is_null() && !content.empty()) {
|
429
438
|
obj["content"] = content;
|
430
439
|
}
|
431
440
|
message["content"] = obj.dump(2);
|
@@ -435,13 +444,12 @@ class chat_template {
|
|
435
444
|
if (polyfill_tool_responses && role == "tool") {
|
436
445
|
message["role"] = "user";
|
437
446
|
auto obj = json {
|
438
|
-
{"tool_response",
|
439
|
-
{"content", message.at("content")},
|
440
|
-
}},
|
447
|
+
{"tool_response", json::object()},
|
441
448
|
};
|
442
449
|
if (message.contains("name")) {
|
443
|
-
obj["tool_response"]["
|
450
|
+
obj["tool_response"]["tool"] = message.at("name");
|
444
451
|
}
|
452
|
+
obj["tool_response"]["content"] = message.at("content");
|
445
453
|
if (message.contains("tool_call_id")) {
|
446
454
|
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
|
447
455
|
}
|
@@ -510,7 +518,7 @@ class chat_template {
|
|
510
518
|
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
|
511
519
|
json messages_with_system = messages;
|
512
520
|
|
513
|
-
if (messages_with_system.
|
521
|
+
if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") {
|
514
522
|
std::string existing_system = messages_with_system.at(0).at("content");
|
515
523
|
messages_with_system[0] = json {
|
516
524
|
{"role", "system"},
|