cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -0,0 +1,327 @@
|
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
|
3
|
+
#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|
4
|
+
|
5
|
+
#ifdef _MSC_VER
|
6
|
+
#include <intrin.h>
|
7
|
+
#endif
|
8
|
+
|
9
|
+
#include <cstring>
|
10
|
+
#include <vector>
|
11
|
+
#include <bitset>
|
12
|
+
#include <array>
|
13
|
+
#include <string>
|
14
|
+
|
15
|
+
// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
|
16
|
+
struct cpuid_x86 {
|
17
|
+
bool SSE3(void) { return f_1_ecx[0]; }
|
18
|
+
bool PCLMULQDQ(void) { return f_1_ecx[1]; }
|
19
|
+
bool MONITOR(void) { return f_1_ecx[3]; }
|
20
|
+
bool SSSE3(void) { return f_1_ecx[9]; }
|
21
|
+
bool FMA(void) { return f_1_ecx[12]; }
|
22
|
+
bool CMPXCHG16B(void) { return f_1_ecx[13]; }
|
23
|
+
bool SSE41(void) { return f_1_ecx[19]; }
|
24
|
+
bool SSE42(void) { return f_1_ecx[20]; }
|
25
|
+
bool MOVBE(void) { return f_1_ecx[22]; }
|
26
|
+
bool POPCNT(void) { return f_1_ecx[23]; }
|
27
|
+
bool AES(void) { return f_1_ecx[25]; }
|
28
|
+
bool XSAVE(void) { return f_1_ecx[26]; }
|
29
|
+
bool OSXSAVE(void) { return f_1_ecx[27]; }
|
30
|
+
bool AVX(void) { return f_1_ecx[28]; }
|
31
|
+
bool F16C(void) { return f_1_ecx[29]; }
|
32
|
+
bool RDRAND(void) { return f_1_ecx[30]; }
|
33
|
+
|
34
|
+
bool MSR(void) { return f_1_edx[5]; }
|
35
|
+
bool CX8(void) { return f_1_edx[8]; }
|
36
|
+
bool SEP(void) { return f_1_edx[11]; }
|
37
|
+
bool CMOV(void) { return f_1_edx[15]; }
|
38
|
+
bool CLFSH(void) { return f_1_edx[19]; }
|
39
|
+
bool MMX(void) { return f_1_edx[23]; }
|
40
|
+
bool FXSR(void) { return f_1_edx[24]; }
|
41
|
+
bool SSE(void) { return f_1_edx[25]; }
|
42
|
+
bool SSE2(void) { return f_1_edx[26]; }
|
43
|
+
|
44
|
+
bool FSGSBASE(void) { return f_7_ebx[0]; }
|
45
|
+
bool BMI1(void) { return f_7_ebx[3]; }
|
46
|
+
bool HLE(void) { return is_intel && f_7_ebx[4]; }
|
47
|
+
bool AVX2(void) { return f_7_ebx[5]; }
|
48
|
+
bool BMI2(void) { return f_7_ebx[8]; }
|
49
|
+
bool ERMS(void) { return f_7_ebx[9]; }
|
50
|
+
bool INVPCID(void) { return f_7_ebx[10]; }
|
51
|
+
bool RTM(void) { return is_intel && f_7_ebx[11]; }
|
52
|
+
bool AVX512F(void) { return f_7_ebx[16]; }
|
53
|
+
bool AVX512DQ(void) { return f_7_ebx[17]; }
|
54
|
+
bool RDSEED(void) { return f_7_ebx[18]; }
|
55
|
+
bool ADX(void) { return f_7_ebx[19]; }
|
56
|
+
bool AVX512PF(void) { return f_7_ebx[26]; }
|
57
|
+
bool AVX512ER(void) { return f_7_ebx[27]; }
|
58
|
+
bool AVX512CD(void) { return f_7_ebx[28]; }
|
59
|
+
bool AVX512BW(void) { return f_7_ebx[30]; }
|
60
|
+
bool AVX512VL(void) { return f_7_ebx[31]; }
|
61
|
+
|
62
|
+
bool SHA(void) { return f_7_ebx[29]; }
|
63
|
+
|
64
|
+
bool PREFETCHWT1(void) { return f_7_ecx[0]; }
|
65
|
+
|
66
|
+
bool LAHF(void) { return f_81_ecx[0]; }
|
67
|
+
bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
|
68
|
+
bool ABM(void) { return is_amd && f_81_ecx[5]; }
|
69
|
+
bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
|
70
|
+
bool XOP(void) { return is_amd && f_81_ecx[11]; }
|
71
|
+
bool TBM(void) { return is_amd && f_81_ecx[21]; }
|
72
|
+
|
73
|
+
bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
|
74
|
+
bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
|
75
|
+
bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
|
76
|
+
bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
|
77
|
+
bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
|
78
|
+
|
79
|
+
bool AVX512_VBMI(void) { return f_7_ecx[1]; }
|
80
|
+
bool AVX512_VNNI(void) { return f_7_ecx[11]; }
|
81
|
+
bool AVX512_FP16(void) { return f_7_edx[23]; }
|
82
|
+
bool AVX512_BF16(void) { return f_7_1_eax[5]; }
|
83
|
+
bool AVX_VNNI(void) { return f_7_1_eax[4]; }
|
84
|
+
|
85
|
+
bool AMX_TILE(void) { return f_7_edx[24]; }
|
86
|
+
bool AMX_INT8(void) { return f_7_edx[25]; }
|
87
|
+
bool AMX_FP16(void) { return f_7_1_eax[21]; }
|
88
|
+
bool AMX_BF16(void) { return f_7_edx[22]; }
|
89
|
+
|
90
|
+
#ifdef _MSC_VER
|
91
|
+
static void cpuid(int cpu_info[4], int eax) {
|
92
|
+
__cpuid(cpu_info, eax);
|
93
|
+
}
|
94
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
95
|
+
__cpuidex(cpu_info, eax, ecx);
|
96
|
+
}
|
97
|
+
#else
|
98
|
+
static void cpuid(int cpu_info[4], int eax) {
|
99
|
+
__asm__ __volatile__(
|
100
|
+
"cpuid"
|
101
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
102
|
+
: "a"(eax), "c"(0));
|
103
|
+
}
|
104
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
105
|
+
__asm__ __volatile__(
|
106
|
+
"cpuid"
|
107
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
108
|
+
: "a"(eax), "c"(ecx));
|
109
|
+
}
|
110
|
+
#endif
|
111
|
+
|
112
|
+
cpuid_x86() {
|
113
|
+
std::array<int, 4> cpui;
|
114
|
+
std::vector<std::array<int, 4>> data;
|
115
|
+
|
116
|
+
// calling __cpuid with 0x0 as the function_id argument
|
117
|
+
// gets the number of the highest valid function ID.
|
118
|
+
cpuid(cpui.data(), 0);
|
119
|
+
int n_ids = cpui[0];
|
120
|
+
|
121
|
+
for (int i = 0; i <= n_ids; ++i) {
|
122
|
+
cpuidex(cpui.data(), i, 0);
|
123
|
+
data.push_back(cpui);
|
124
|
+
}
|
125
|
+
|
126
|
+
// capture vendor string
|
127
|
+
char vendor[0x20] = {};
|
128
|
+
*reinterpret_cast<int *>(vendor) = data[0][1];
|
129
|
+
*reinterpret_cast<int *>(vendor + 4) = data[0][3];
|
130
|
+
*reinterpret_cast<int *>(vendor + 8) = data[0][2];
|
131
|
+
this->vendor = vendor;
|
132
|
+
if (this->vendor == "GenuineIntel") {
|
133
|
+
is_intel = true;
|
134
|
+
} else if (this->vendor == "AuthenticAMD") {
|
135
|
+
is_amd = true;
|
136
|
+
}
|
137
|
+
|
138
|
+
// load bitset with flags for function 0x00000001
|
139
|
+
if (n_ids >= 1) {
|
140
|
+
f_1_ecx = data[1][2];
|
141
|
+
f_1_edx = data[1][3];
|
142
|
+
}
|
143
|
+
|
144
|
+
// load bitset with flags for function 0x00000007
|
145
|
+
if (n_ids >= 7) {
|
146
|
+
f_7_ebx = data[7][1];
|
147
|
+
f_7_ecx = data[7][2];
|
148
|
+
f_7_edx = data[7][3];
|
149
|
+
cpuidex(cpui.data(), 7, 1);
|
150
|
+
f_7_1_eax = cpui[0];
|
151
|
+
}
|
152
|
+
|
153
|
+
// calling __cpuid with 0x80000000 as the function_id argument
|
154
|
+
// gets the number of the highest valid extended ID.
|
155
|
+
cpuid(cpui.data(), 0x80000000);
|
156
|
+
unsigned int n_ex_ids = cpui[0];
|
157
|
+
|
158
|
+
std::vector<std::array<int, 4>> ext_data;
|
159
|
+
for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
|
160
|
+
cpuidex(cpui.data(), i, 0);
|
161
|
+
ext_data.push_back(cpui);
|
162
|
+
}
|
163
|
+
|
164
|
+
// load bitset with flags for function 0x80000001
|
165
|
+
if (n_ex_ids >= 0x80000001) {
|
166
|
+
f_81_ecx = ext_data[1][2];
|
167
|
+
f_81_edx = ext_data[1][3];
|
168
|
+
}
|
169
|
+
|
170
|
+
// interpret CPU brand string if reported
|
171
|
+
char brand[0x40] = {};
|
172
|
+
if (n_ex_ids >= 0x80000004) {
|
173
|
+
std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
|
174
|
+
std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
|
175
|
+
std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
|
176
|
+
this->brand = brand;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
bool is_intel = false;
|
181
|
+
bool is_amd = false;
|
182
|
+
std::string vendor;
|
183
|
+
std::string brand;
|
184
|
+
std::bitset<32> f_1_ecx;
|
185
|
+
std::bitset<32> f_1_edx;
|
186
|
+
std::bitset<32> f_7_ebx;
|
187
|
+
std::bitset<32> f_7_ecx;
|
188
|
+
std::bitset<32> f_7_edx;
|
189
|
+
std::bitset<32> f_7_1_eax;
|
190
|
+
std::bitset<32> f_81_ecx;
|
191
|
+
std::bitset<32> f_81_edx;
|
192
|
+
};
|
193
|
+
|
194
|
+
#if 0
|
195
|
+
void test_x86_is() {
|
196
|
+
cpuid_x86 is;
|
197
|
+
printf("CPU Vendor: %s\n", is.vendor.c_str());
|
198
|
+
printf("Brand: %s\n", is.brand.c_str());
|
199
|
+
printf("is_intel: %d\n", is.is_intel);
|
200
|
+
printf("is_amd: %d\n", is.is_amd);
|
201
|
+
printf("sse3: %d\n", is.SSE3());
|
202
|
+
printf("pclmulqdq: %d\n", is.PCLMULQDQ());
|
203
|
+
printf("ssse3: %d\n", is.SSSE3());
|
204
|
+
printf("fma: %d\n", is.FMA());
|
205
|
+
printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
|
206
|
+
printf("sse41: %d\n", is.SSE41());
|
207
|
+
printf("sse42: %d\n", is.SSE42());
|
208
|
+
printf("movbe: %d\n", is.MOVBE());
|
209
|
+
printf("popcnt: %d\n", is.POPCNT());
|
210
|
+
printf("aes: %d\n", is.AES());
|
211
|
+
printf("xsave: %d\n", is.XSAVE());
|
212
|
+
printf("osxsave: %d\n", is.OSXSAVE());
|
213
|
+
printf("avx: %d\n", is.AVX());
|
214
|
+
printf("f16c: %d\n", is.F16C());
|
215
|
+
printf("rdrand: %d\n", is.RDRAND());
|
216
|
+
printf("msr: %d\n", is.MSR());
|
217
|
+
printf("cx8: %d\n", is.CX8());
|
218
|
+
printf("sep: %d\n", is.SEP());
|
219
|
+
printf("cmov: %d\n", is.CMOV());
|
220
|
+
printf("clflush: %d\n", is.CLFSH());
|
221
|
+
printf("mmx: %d\n", is.MMX());
|
222
|
+
printf("fxsr: %d\n", is.FXSR());
|
223
|
+
printf("sse: %d\n", is.SSE());
|
224
|
+
printf("sse2: %d\n", is.SSE2());
|
225
|
+
printf("fsgsbase: %d\n", is.FSGSBASE());
|
226
|
+
printf("bmi1: %d\n", is.BMI1());
|
227
|
+
printf("hle: %d\n", is.HLE());
|
228
|
+
printf("avx2: %d\n", is.AVX2());
|
229
|
+
printf("bmi2: %d\n", is.BMI2());
|
230
|
+
printf("erms: %d\n", is.ERMS());
|
231
|
+
printf("invpcid: %d\n", is.INVPCID());
|
232
|
+
printf("rtm: %d\n", is.RTM());
|
233
|
+
printf("avx512f: %d\n", is.AVX512F());
|
234
|
+
printf("rdseed: %d\n", is.RDSEED());
|
235
|
+
printf("adx: %d\n", is.ADX());
|
236
|
+
printf("avx512pf: %d\n", is.AVX512PF());
|
237
|
+
printf("avx512er: %d\n", is.AVX512ER());
|
238
|
+
printf("avx512cd: %d\n", is.AVX512CD());
|
239
|
+
printf("sha: %d\n", is.SHA());
|
240
|
+
printf("prefetchwt1: %d\n", is.PREFETCHWT1());
|
241
|
+
printf("lahf: %d\n", is.LAHF());
|
242
|
+
printf("lzcnt: %d\n", is.LZCNT());
|
243
|
+
printf("abm: %d\n", is.ABM());
|
244
|
+
printf("sse4a: %d\n", is.SSE4a());
|
245
|
+
printf("xop: %d\n", is.XOP());
|
246
|
+
printf("tbm: %d\n", is.TBM());
|
247
|
+
printf("syscall: %d\n", is.SYSCALL());
|
248
|
+
printf("mmxext: %d\n", is.MMXEXT());
|
249
|
+
printf("rdtscp: %d\n", is.RDTSCP());
|
250
|
+
printf("3dnowext: %d\n", is._3DNOWEXT());
|
251
|
+
printf("3dnow: %d\n", is._3DNOW());
|
252
|
+
printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
|
253
|
+
printf("avx512_vnni: %d\n", is.AVX512_VNNI());
|
254
|
+
printf("avx512_fp16: %d\n", is.AVX512_FP16());
|
255
|
+
printf("avx512_bf16: %d\n", is.AVX512_BF16());
|
256
|
+
printf("amx_tile: %d\n", is.AMX_TILE());
|
257
|
+
printf("amx_int8: %d\n", is.AMX_INT8());
|
258
|
+
printf("amx_fp16: %d\n", is.AMX_FP16());
|
259
|
+
printf("amx_bf16: %d\n", is.AMX_BF16());
|
260
|
+
}
|
261
|
+
#endif
|
262
|
+
|
263
|
+
static int lm_ggml_backend_cpu_x86_score() {
|
264
|
+
// FIXME: this does not check for OS support
|
265
|
+
|
266
|
+
int score = 1;
|
267
|
+
cpuid_x86 is;
|
268
|
+
|
269
|
+
#ifdef LM_GGML_FMA
|
270
|
+
if (!is.FMA()) { return 0; }
|
271
|
+
score += 1;
|
272
|
+
#endif
|
273
|
+
#ifdef LM_GGML_F16C
|
274
|
+
if (!is.F16C()) { return 0; }
|
275
|
+
score += 1<<1;
|
276
|
+
#endif
|
277
|
+
#ifdef LM_GGML_SSE42
|
278
|
+
if (!is.SSE42()) { return 0; }
|
279
|
+
score += 1<<2;
|
280
|
+
#endif
|
281
|
+
#ifdef LM_GGML_BMI2
|
282
|
+
if (!is.BMI2()) { return 0; }
|
283
|
+
score += 1<<3;
|
284
|
+
#endif
|
285
|
+
#ifdef LM_GGML_AVX
|
286
|
+
if (!is.AVX()) { return 0; }
|
287
|
+
score += 1<<4;
|
288
|
+
#endif
|
289
|
+
#ifdef LM_GGML_AVX2
|
290
|
+
if (!is.AVX2()) { return 0; }
|
291
|
+
score += 1<<5;
|
292
|
+
#endif
|
293
|
+
#ifdef LM_GGML_AVX_VNNI
|
294
|
+
if (!is.AVX_VNNI()) { return 0; }
|
295
|
+
score += 1<<6;
|
296
|
+
#endif
|
297
|
+
#ifdef LM_GGML_AVX512
|
298
|
+
if (!is.AVX512F()) { return 0; }
|
299
|
+
if (!is.AVX512CD()) { return 0; }
|
300
|
+
if (!is.AVX512VL()) { return 0; }
|
301
|
+
if (!is.AVX512DQ()) { return 0; }
|
302
|
+
if (!is.AVX512BW()) { return 0; }
|
303
|
+
score += 1<<7;
|
304
|
+
#endif
|
305
|
+
#ifdef LM_GGML_AVX512_VBMI
|
306
|
+
if (!is.AVX512_VBMI()) { return 0; }
|
307
|
+
score += 1<<8;
|
308
|
+
#endif
|
309
|
+
#ifdef LM_GGML_AVX512_BF16
|
310
|
+
if (!is.AVX512_BF16()) { return 0; }
|
311
|
+
score += 1<<9;
|
312
|
+
#endif
|
313
|
+
#ifdef LM_GGML_AVX512_VNNI
|
314
|
+
if (!is.AVX512_VNNI()) { return 0; }
|
315
|
+
score += 1<<10;
|
316
|
+
#endif
|
317
|
+
#ifdef LM_GGML_AMX_INT8
|
318
|
+
if (!is.AMX_INT8()) { return 0; }
|
319
|
+
score += 1<<11;
|
320
|
+
#endif
|
321
|
+
|
322
|
+
return score;
|
323
|
+
}
|
324
|
+
|
325
|
+
LM_GGML_BACKEND_DL_SCORE_IMPL(lm_ggml_backend_cpu_x86_score)
|
326
|
+
|
327
|
+
#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|