cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h
CHANGED
@@ -126,7 +126,7 @@ enum common_grammar_trigger_type {
|
|
126
126
|
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
|
127
127
|
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
128
128
|
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
129
|
-
|
129
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
130
130
|
};
|
131
131
|
|
132
132
|
struct common_grammar_trigger {
|
@@ -210,6 +210,9 @@ struct common_params_speculative {
|
|
210
210
|
float p_split = 0.1f; // speculative decoding split probability
|
211
211
|
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
|
212
212
|
|
213
|
+
lm_ggml_type cache_type_k = LM_GGML_TYPE_F16; // KV cache data type for the K
|
214
|
+
lm_ggml_type cache_type_v = LM_GGML_TYPE_F16; // KV cache data type for the V
|
215
|
+
|
213
216
|
struct cpu_params cpuparams;
|
214
217
|
struct cpu_params cpuparams_batch;
|
215
218
|
|
@@ -226,7 +229,8 @@ struct common_params_vocoder {
|
|
226
229
|
|
227
230
|
enum common_reasoning_format {
|
228
231
|
COMMON_REASONING_FORMAT_NONE,
|
229
|
-
|
232
|
+
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
|
233
|
+
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
230
234
|
};
|
231
235
|
|
232
236
|
struct common_params {
|
@@ -306,6 +310,7 @@ struct common_params {
|
|
306
310
|
int32_t verbosity = 0;
|
307
311
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
308
312
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
313
|
+
bool offline = false;
|
309
314
|
|
310
315
|
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
311
316
|
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
@@ -368,7 +373,7 @@ struct common_params {
|
|
368
373
|
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
369
374
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
370
375
|
std::string embd_sep = "\n"; // separator of embeddings
|
371
|
-
|
376
|
+
std::string cls_sep = "\t"; // separator of classification sequences
|
372
377
|
|
373
378
|
// server params
|
374
379
|
int32_t port = 8080; // server listens on this network port
|
@@ -383,6 +388,7 @@ struct common_params {
|
|
383
388
|
bool use_jinja = false; // NOLINT
|
384
389
|
bool enable_chat_template = true;
|
385
390
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
391
|
+
int reasoning_budget = -1;
|
386
392
|
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
387
393
|
|
388
394
|
std::vector<std::string> api_keys;
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h
CHANGED
@@ -1074,6 +1074,10 @@ LM_GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
|
1074
1074
|
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
1075
1075
|
LM_GGML_TABLE_END()
|
1076
1076
|
|
1077
|
+
LM_GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
|
1078
|
+
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
|
1079
|
+
LM_GGML_TABLE_END()
|
1080
|
+
|
1077
1081
|
#define NGRID_IQ1S 2048
|
1078
1082
|
#define IQ1S_DELTA 0.125f
|
1079
1083
|
#define IQ1M_DELTA 0.125f
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h
CHANGED
@@ -101,6 +101,7 @@ extern "C" {
|
|
101
101
|
LM_GGML_BACKEND_API int lm_ggml_cpu_has_riscv_v (void);
|
102
102
|
LM_GGML_BACKEND_API int lm_ggml_cpu_has_vsx (void);
|
103
103
|
LM_GGML_BACKEND_API int lm_ggml_cpu_has_vxe (void);
|
104
|
+
LM_GGML_BACKEND_API int lm_ggml_cpu_has_nnpa (void);
|
104
105
|
LM_GGML_BACKEND_API int lm_ggml_cpu_has_wasm_simd (void);
|
105
106
|
LM_GGML_BACKEND_API int lm_ggml_cpu_has_llamafile (void);
|
106
107
|
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h
CHANGED
@@ -32,6 +32,8 @@
|
|
32
32
|
extern "C" {
|
33
33
|
#endif
|
34
34
|
|
35
|
+
void lm_ggml_print_backtrace(void);
|
36
|
+
|
35
37
|
#ifndef MIN
|
36
38
|
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
37
39
|
#endif
|
@@ -315,203 +317,81 @@ struct lm_ggml_cgraph lm_ggml_graph_view(struct lm_ggml_cgraph * cgraph, int i0,
|
|
315
317
|
LM_GGML_API void * lm_ggml_aligned_malloc(size_t size);
|
316
318
|
LM_GGML_API void lm_ggml_aligned_free(void * ptr, size_t size);
|
317
319
|
|
318
|
-
// FP16
|
319
|
-
|
320
|
-
// 16-bit float
|
321
|
-
// on Arm, we use __fp16
|
322
|
-
// on x86, we use uint16_t
|
323
|
-
//
|
324
|
-
// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
|
325
|
-
// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
|
326
|
-
//
|
327
|
-
#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
|
328
|
-
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
329
|
-
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
|
330
|
-
|
331
|
-
#define LM_GGML_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
332
|
-
|
333
|
-
static inline float lm_ggml_compute_fp16_to_fp32(lm_ggml_fp16_t h) {
|
334
|
-
__fp16 tmp;
|
335
|
-
memcpy(&tmp, &h, sizeof(lm_ggml_fp16_t));
|
336
|
-
return (float)tmp;
|
337
|
-
}
|
338
|
-
|
339
|
-
static inline lm_ggml_fp16_t lm_ggml_compute_fp32_to_fp16(float f) {
|
340
|
-
lm_ggml_fp16_t res;
|
341
|
-
__fp16 tmp = f;
|
342
|
-
memcpy(&res, &tmp, sizeof(lm_ggml_fp16_t));
|
343
|
-
return res;
|
344
|
-
}
|
345
|
-
|
346
|
-
#elif defined(__F16C__)
|
347
|
-
|
348
|
-
#ifdef _MSC_VER
|
349
|
-
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
|
350
|
-
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
|
351
|
-
#else
|
352
|
-
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
353
|
-
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
354
|
-
#endif
|
355
|
-
|
356
|
-
#elif defined(__POWER9_VECTOR__)
|
357
|
-
|
358
|
-
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
359
|
-
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
|
360
|
-
/* the inline asm below is about 12% faster than the lookup method */
|
361
|
-
#define LM_GGML_FP16_TO_FP32(x) LM_GGML_COMPUTE_FP16_TO_FP32(x)
|
362
|
-
#define LM_GGML_FP32_TO_FP16(x) LM_GGML_COMPUTE_FP32_TO_FP16(x)
|
363
|
-
|
364
|
-
static inline float lm_ggml_compute_fp16_to_fp32(lm_ggml_fp16_t h) {
|
365
|
-
float f;
|
366
|
-
double d;
|
367
|
-
__asm__(
|
368
|
-
"mtfprd %0,%2\n"
|
369
|
-
"xscvhpdp %0,%0\n"
|
370
|
-
"frsp %1,%0\n" :
|
371
|
-
/* temp */ "=d"(d),
|
372
|
-
/* out */ "=f"(f):
|
373
|
-
/* in */ "r"(h));
|
374
|
-
return f;
|
375
|
-
}
|
376
|
-
|
377
|
-
static inline lm_ggml_fp16_t lm_ggml_compute_fp32_to_fp16(float f) {
|
378
|
-
double d;
|
379
|
-
lm_ggml_fp16_t r;
|
380
|
-
__asm__( /* xscvdphp can work on double or single precision */
|
381
|
-
"xscvdphp %0,%2\n"
|
382
|
-
"mffprd %1,%0\n" :
|
383
|
-
/* temp */ "=d"(d),
|
384
|
-
/* out */ "=r"(r):
|
385
|
-
/* in */ "f"(f));
|
386
|
-
return r;
|
387
|
-
}
|
388
|
-
|
389
|
-
#elif defined(__riscv) && defined(LM_GGML_RV_ZFH)
|
320
|
+
// FP16 <-> FP32
|
321
|
+
// ref: https://github.com/Maratyszcza/FP16
|
390
322
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
return f;
|
400
|
-
}
|
323
|
+
static inline float fp32_from_bits(uint32_t w) {
|
324
|
+
union {
|
325
|
+
uint32_t as_bits;
|
326
|
+
float as_value;
|
327
|
+
} fp32;
|
328
|
+
fp32.as_bits = w;
|
329
|
+
return fp32.as_value;
|
330
|
+
}
|
401
331
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
return res;
|
411
|
-
}
|
332
|
+
static inline uint32_t fp32_to_bits(float f) {
|
333
|
+
union {
|
334
|
+
float as_value;
|
335
|
+
uint32_t as_bits;
|
336
|
+
} fp32;
|
337
|
+
fp32.as_value = f;
|
338
|
+
return fp32.as_bits;
|
339
|
+
}
|
412
340
|
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
341
|
+
static inline float lm_ggml_compute_fp16_to_fp32(lm_ggml_fp16_t h) {
|
342
|
+
const uint32_t w = (uint32_t) h << 16;
|
343
|
+
const uint32_t sign = w & UINT32_C(0x80000000);
|
344
|
+
const uint32_t two_w = w + w;
|
417
345
|
|
346
|
+
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
347
|
+
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
|
348
|
+
const float exp_scale = 0x1.0p-112f;
|
418
349
|
#else
|
350
|
+
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
351
|
+
#endif
|
352
|
+
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
419
353
|
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
static inline float fp32_from_bits(uint32_t w) {
|
424
|
-
union {
|
425
|
-
uint32_t as_bits;
|
426
|
-
float as_value;
|
427
|
-
} fp32;
|
428
|
-
fp32.as_bits = w;
|
429
|
-
return fp32.as_value;
|
430
|
-
}
|
431
|
-
|
432
|
-
static inline uint32_t fp32_to_bits(float f) {
|
433
|
-
union {
|
434
|
-
float as_value;
|
435
|
-
uint32_t as_bits;
|
436
|
-
} fp32;
|
437
|
-
fp32.as_value = f;
|
438
|
-
return fp32.as_bits;
|
439
|
-
}
|
440
|
-
|
441
|
-
static inline float lm_ggml_compute_fp16_to_fp32(lm_ggml_fp16_t h) {
|
442
|
-
const uint32_t w = (uint32_t) h << 16;
|
443
|
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
444
|
-
const uint32_t two_w = w + w;
|
445
|
-
|
446
|
-
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
447
|
-
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
|
448
|
-
const float exp_scale = 0x1.0p-112f;
|
449
|
-
#else
|
450
|
-
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
451
|
-
#endif
|
452
|
-
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
453
|
-
|
454
|
-
const uint32_t magic_mask = UINT32_C(126) << 23;
|
455
|
-
const float magic_bias = 0.5f;
|
456
|
-
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
457
|
-
|
458
|
-
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
459
|
-
const uint32_t result = sign |
|
460
|
-
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
461
|
-
return fp32_from_bits(result);
|
462
|
-
}
|
354
|
+
const uint32_t magic_mask = UINT32_C(126) << 23;
|
355
|
+
const float magic_bias = 0.5f;
|
356
|
+
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
463
357
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
470
|
-
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
471
|
-
#endif
|
472
|
-
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
473
|
-
|
474
|
-
const uint32_t w = fp32_to_bits(f);
|
475
|
-
const uint32_t shl1_w = w + w;
|
476
|
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
477
|
-
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
478
|
-
if (bias < UINT32_C(0x71000000)) {
|
479
|
-
bias = UINT32_C(0x71000000);
|
480
|
-
}
|
358
|
+
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
359
|
+
const uint32_t result = sign |
|
360
|
+
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
361
|
+
return fp32_from_bits(result);
|
362
|
+
}
|
481
363
|
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
364
|
+
static inline lm_ggml_fp16_t lm_ggml_compute_fp32_to_fp16(float f) {
|
365
|
+
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
|
366
|
+
const float scale_to_inf = 0x1.0p+112f;
|
367
|
+
const float scale_to_zero = 0x1.0p-110f;
|
368
|
+
#else
|
369
|
+
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
370
|
+
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
371
|
+
#endif
|
372
|
+
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
373
|
+
|
374
|
+
const uint32_t w = fp32_to_bits(f);
|
375
|
+
const uint32_t shl1_w = w + w;
|
376
|
+
const uint32_t sign = w & UINT32_C(0x80000000);
|
377
|
+
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
378
|
+
if (bias < UINT32_C(0x71000000)) {
|
379
|
+
bias = UINT32_C(0x71000000);
|
488
380
|
}
|
489
381
|
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
// defined in ggml.c, initialized in lm_ggml_init()
|
497
|
-
LM_GGML_API float lm_ggml_table_f32_f16[1 << 16];
|
498
|
-
|
499
|
-
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into lm_ggml_lookup_fp16_to_fp32,
|
500
|
-
// so we define LM_GGML_FP16_TO_FP32 and LM_GGML_FP32_TO_FP16 elsewhere for NEON.
|
501
|
-
// This is also true for POWER9.
|
502
|
-
#if !defined(LM_GGML_FP16_TO_FP32)
|
503
|
-
inline static float lm_ggml_lookup_fp16_to_fp32(lm_ggml_fp16_t f) {
|
504
|
-
uint16_t s;
|
505
|
-
memcpy(&s, &f, sizeof(uint16_t));
|
506
|
-
return lm_ggml_table_f32_f16[s];
|
382
|
+
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
383
|
+
const uint32_t bits = fp32_to_bits(base);
|
384
|
+
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
385
|
+
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
386
|
+
const uint32_t nonsign = exp_bits + mantissa_bits;
|
387
|
+
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
507
388
|
}
|
508
389
|
|
509
|
-
#define
|
510
|
-
#
|
390
|
+
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
391
|
+
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
|
511
392
|
|
512
|
-
#
|
393
|
+
#define LM_GGML_FP16_TO_FP32(x) LM_GGML_COMPUTE_FP16_TO_FP32(x)
|
513
394
|
#define LM_GGML_FP32_TO_FP16(x) LM_GGML_COMPUTE_FP32_TO_FP16(x)
|
514
|
-
#endif
|
515
395
|
|
516
396
|
/**
|
517
397
|
* Converts brain16 to float32.
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h
CHANGED
@@ -490,6 +490,7 @@ extern "C" {
|
|
490
490
|
LM_GGML_OP_UPSCALE, // nearest interpolate
|
491
491
|
LM_GGML_OP_PAD,
|
492
492
|
LM_GGML_OP_PAD_REFLECT_1D,
|
493
|
+
LM_GGML_OP_ROLL,
|
493
494
|
LM_GGML_OP_ARANGE,
|
494
495
|
LM_GGML_OP_TIMESTEP_EMBEDDING,
|
495
496
|
LM_GGML_OP_ARGSORT,
|
@@ -936,6 +937,15 @@ extern "C" {
|
|
936
937
|
struct lm_ggml_tensor * a,
|
937
938
|
struct lm_ggml_tensor * b);
|
938
939
|
|
940
|
+
// repeat a to the specified shape
|
941
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_4d(
|
942
|
+
struct lm_ggml_context * ctx,
|
943
|
+
struct lm_ggml_tensor * a,
|
944
|
+
int64_t ne0,
|
945
|
+
int64_t ne1,
|
946
|
+
int64_t ne2,
|
947
|
+
int64_t ne3);
|
948
|
+
|
939
949
|
// sums repetitions in a into shape of b
|
940
950
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
|
941
951
|
struct lm_ggml_context * ctx,
|
@@ -1793,6 +1803,17 @@ extern "C" {
|
|
1793
1803
|
int p0,
|
1794
1804
|
int p1);
|
1795
1805
|
|
1806
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
1807
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
1808
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_roll(
|
1809
|
+
struct lm_ggml_context * ctx,
|
1810
|
+
struct lm_ggml_tensor * a,
|
1811
|
+
int shift0,
|
1812
|
+
int shift1,
|
1813
|
+
int shift2,
|
1814
|
+
int shift3);
|
1815
|
+
|
1816
|
+
|
1796
1817
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1797
1818
|
// timesteps: [N,]
|
1798
1819
|
// return: [N, dim]
|
@@ -2087,9 +2108,6 @@ extern "C" {
|
|
2087
2108
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad (const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
|
2088
2109
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
|
2089
2110
|
|
2090
|
-
LM_GGML_API void lm_ggml_graph_export(const struct lm_ggml_cgraph * cgraph, const char * fname);
|
2091
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_import(const char * fname, struct lm_ggml_context ** ctx_data, struct lm_ggml_context ** ctx_eval);
|
2092
|
-
|
2093
2111
|
// print info and performance information for the graph
|
2094
2112
|
LM_GGML_API void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph);
|
2095
2113
|
|
@@ -2173,6 +2191,7 @@ extern "C" {
|
|
2173
2191
|
|
2174
2192
|
// scheduling priorities
|
2175
2193
|
enum lm_ggml_sched_priority {
|
2194
|
+
LM_GGML_SCHED_PRIO_LOW = -1,
|
2176
2195
|
LM_GGML_SCHED_PRIO_NORMAL,
|
2177
2196
|
LM_GGML_SCHED_PRIO_MEDIUM,
|
2178
2197
|
LM_GGML_SCHED_PRIO_HIGH,
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "nlohmann/json.hpp"
|
4
|
+
|
5
|
+
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
|
6
|
+
struct common_healing_marker {
|
7
|
+
// Raw marker.
|
8
|
+
std::string marker;
|
9
|
+
|
10
|
+
// Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
|
11
|
+
std::string json_dump_marker;
|
12
|
+
};
|
13
|
+
|
14
|
+
// Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
|
15
|
+
struct common_json {
|
16
|
+
nlohmann::ordered_json json;
|
17
|
+
|
18
|
+
common_healing_marker healing_marker;
|
19
|
+
};
|
20
|
+
|
21
|
+
// Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
|
22
|
+
//
|
23
|
+
// Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
|
24
|
+
// This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
|
25
|
+
// (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
|
26
|
+
//
|
27
|
+
// For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
|
28
|
+
bool common_json_parse(
|
29
|
+
const std::string & input,
|
30
|
+
const std::string & healing_marker,
|
31
|
+
common_json & out);
|
32
|
+
|
33
|
+
// Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
|
34
|
+
bool common_json_parse(
|
35
|
+
std::string::const_iterator & it,
|
36
|
+
const std::string::const_iterator & end,
|
37
|
+
const std::string & healing_marker,
|
38
|
+
common_json & out);
|
@@ -1,9 +1,9 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
|
-
#include "
|
4
|
-
|
5
|
-
#
|
6
|
-
#include
|
3
|
+
#include "nlohmann/json_fwd.hpp"
|
4
|
+
|
5
|
+
#include <functional>
|
6
|
+
#include <string>
|
7
7
|
|
8
8
|
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
9
9
|
bool force_gbnf = false);
|
package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h
CHANGED
@@ -24,6 +24,7 @@ enum llm_arch {
|
|
24
24
|
LLM_ARCH_BERT,
|
25
25
|
LLM_ARCH_NOMIC_BERT,
|
26
26
|
LLM_ARCH_NOMIC_BERT_MOE,
|
27
|
+
LLM_ARCH_NEO_BERT,
|
27
28
|
LLM_ARCH_JINA_BERT_V2,
|
28
29
|
LLM_ARCH_BLOOM,
|
29
30
|
LLM_ARCH_STABLELM,
|
@@ -45,6 +46,7 @@ enum llm_arch {
|
|
45
46
|
LLM_ARCH_GEMMA,
|
46
47
|
LLM_ARCH_GEMMA2,
|
47
48
|
LLM_ARCH_GEMMA3,
|
49
|
+
LLM_ARCH_GEMMA3N,
|
48
50
|
LLM_ARCH_STARCODER2,
|
49
51
|
LLM_ARCH_MAMBA,
|
50
52
|
LLM_ARCH_XVERSE,
|
@@ -76,6 +78,8 @@ enum llm_arch {
|
|
76
78
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
77
79
|
LLM_ARCH_PLM,
|
78
80
|
LLM_ARCH_BAILINGMOE,
|
81
|
+
LLM_ARCH_DOTS1,
|
82
|
+
LLM_ARCH_ARCEE,
|
79
83
|
LLM_ARCH_UNKNOWN,
|
80
84
|
};
|
81
85
|
|
@@ -148,6 +152,7 @@ enum llm_kv {
|
|
148
152
|
LLM_KV_ATTENTION_SCALE,
|
149
153
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
150
154
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
155
|
+
LLM_KV_ATTENTION_LAYER_INDICES,
|
151
156
|
|
152
157
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
153
158
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
@@ -190,13 +195,13 @@ enum llm_kv {
|
|
190
195
|
LLM_KV_TOKENIZER_MASK_ID,
|
191
196
|
LLM_KV_TOKENIZER_ADD_BOS,
|
192
197
|
LLM_KV_TOKENIZER_ADD_EOS,
|
198
|
+
LLM_KV_TOKENIZER_ADD_SEP,
|
193
199
|
LLM_KV_TOKENIZER_ADD_PREFIX,
|
194
200
|
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
195
201
|
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
196
202
|
LLM_KV_TOKENIZER_HF_JSON,
|
197
203
|
LLM_KV_TOKENIZER_RWKV,
|
198
204
|
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
199
|
-
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
|
200
205
|
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
201
206
|
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
202
207
|
LLM_KV_TOKENIZER_FIM_MID_ID,
|
@@ -213,6 +218,8 @@ enum llm_kv {
|
|
213
218
|
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
214
219
|
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
215
220
|
|
221
|
+
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
222
|
+
|
216
223
|
// deprecated:
|
217
224
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
218
225
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
@@ -263,6 +270,22 @@ enum llm_tensor {
|
|
263
270
|
LLM_TENSOR_LAYER_OUT_NORM,
|
264
271
|
LLM_TENSOR_POST_ATTN_NORM,
|
265
272
|
LLM_TENSOR_POST_MLP_NORM,
|
273
|
+
LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
|
274
|
+
LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
|
275
|
+
LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
|
276
|
+
LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
|
277
|
+
LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
|
278
|
+
LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
|
279
|
+
LLM_TENSOR_ALTUP_PROJ, // gemma3n
|
280
|
+
LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
|
281
|
+
LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
|
282
|
+
LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
|
283
|
+
LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
|
284
|
+
LLM_TENSOR_ALTUP_ROUTER, // gemma3n
|
285
|
+
LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
|
286
|
+
LLM_TENSOR_LAUREL_L, // gemma3n
|
287
|
+
LLM_TENSOR_LAUREL_R, // gemma3n
|
288
|
+
LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
|
266
289
|
LLM_TENSOR_SSM_IN,
|
267
290
|
LLM_TENSOR_SSM_CONV1D,
|
268
291
|
LLM_TENSOR_SSM_X,
|
@@ -435,3 +458,6 @@ const char * llm_arch_name(llm_arch arch);
|
|
435
458
|
llm_arch llm_arch_from_string(const std::string & name);
|
436
459
|
|
437
460
|
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
461
|
+
|
462
|
+
bool llm_arch_is_recurrent(const llm_arch & arch);
|
463
|
+
bool llm_arch_is_hybrid (const llm_arch & arch);
|