cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/llama-arch.cpp
CHANGED
@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
20
20
|
{ LLM_ARCH_BERT, "bert" },
|
21
21
|
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
22
22
|
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
|
23
|
+
{ LLM_ARCH_NEO_BERT, "neo-bert" },
|
23
24
|
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
|
24
25
|
{ LLM_ARCH_BLOOM, "bloom" },
|
25
26
|
{ LLM_ARCH_STABLELM, "stablelm" },
|
@@ -41,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
41
42
|
{ LLM_ARCH_GEMMA, "gemma" },
|
42
43
|
{ LLM_ARCH_GEMMA2, "gemma2" },
|
43
44
|
{ LLM_ARCH_GEMMA3, "gemma3" },
|
45
|
+
{ LLM_ARCH_GEMMA3N, "gemma3n" },
|
44
46
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
45
47
|
{ LLM_ARCH_MAMBA, "mamba" },
|
46
48
|
{ LLM_ARCH_XVERSE, "xverse" },
|
@@ -72,6 +74,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
72
74
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
73
75
|
{ LLM_ARCH_PLM, "plm" },
|
74
76
|
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
77
|
+
{ LLM_ARCH_DOTS1, "dots1" },
|
78
|
+
{ LLM_ARCH_ARCEE, "arcee" },
|
75
79
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
76
80
|
};
|
77
81
|
|
@@ -144,6 +148,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
144
148
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
145
149
|
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
146
150
|
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
151
|
+
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
|
147
152
|
|
148
153
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
149
154
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
@@ -174,6 +179,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
174
179
|
{ LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
|
175
180
|
{ LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
|
176
181
|
|
182
|
+
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
|
183
|
+
|
177
184
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
178
185
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
179
186
|
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
@@ -192,13 +199,13 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
192
199
|
{ LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
|
193
200
|
{ LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
|
194
201
|
{ LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
|
202
|
+
{ LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
|
195
203
|
{ LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
|
196
204
|
{ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
|
197
205
|
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
|
198
206
|
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
|
199
207
|
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
|
200
208
|
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
|
201
|
-
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
|
202
209
|
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
|
203
210
|
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
|
204
211
|
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
|
@@ -242,6 +249,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
242
249
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
243
250
|
},
|
244
251
|
},
|
252
|
+
{
|
253
|
+
LLM_ARCH_ARCEE,
|
254
|
+
{
|
255
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
256
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
257
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
258
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
259
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
260
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
261
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
262
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
263
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
264
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
265
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
266
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
267
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
268
|
+
},
|
269
|
+
},
|
245
270
|
{
|
246
271
|
LLM_ARCH_LLAMA4,
|
247
272
|
{
|
@@ -448,6 +473,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
448
473
|
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
|
449
474
|
{ LLM_TENSOR_POS_EMBD, "position_embd" },
|
450
475
|
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
|
476
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
451
477
|
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
452
478
|
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
453
479
|
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
@@ -492,6 +518,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
492
518
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
493
519
|
},
|
494
520
|
},
|
521
|
+
{
|
522
|
+
LLM_ARCH_NEO_BERT,
|
523
|
+
{
|
524
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
525
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
526
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
527
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
528
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
529
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
530
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
531
|
+
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
|
532
|
+
{ LLM_TENSOR_CLS, "cls" },
|
533
|
+
{ LLM_TENSOR_CLS_OUT, "cls.output" },
|
534
|
+
},
|
535
|
+
},
|
495
536
|
{
|
496
537
|
LLM_ARCH_JINA_BERT_V2,
|
497
538
|
{
|
@@ -892,6 +933,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
892
933
|
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
893
934
|
},
|
894
935
|
},
|
936
|
+
{
|
937
|
+
LLM_ARCH_GEMMA3N,
|
938
|
+
{
|
939
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
940
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
941
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
942
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
943
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
944
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
945
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
946
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
947
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
948
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
949
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
950
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
951
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
952
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
953
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
954
|
+
{ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
|
955
|
+
{ LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
|
956
|
+
{ LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
|
957
|
+
{ LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
|
958
|
+
{ LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
|
959
|
+
{ LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
|
960
|
+
{ LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
|
961
|
+
{ LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
|
962
|
+
{ LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
|
963
|
+
{ LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
|
964
|
+
{ LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
|
965
|
+
{ LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
|
966
|
+
{ LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
|
967
|
+
{ LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
|
968
|
+
{ LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
|
969
|
+
{ LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
|
970
|
+
},
|
971
|
+
},
|
895
972
|
{
|
896
973
|
LLM_ARCH_STARCODER2,
|
897
974
|
{
|
@@ -1553,6 +1630,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
1553
1630
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
1554
1631
|
},
|
1555
1632
|
},
|
1633
|
+
{
|
1634
|
+
LLM_ARCH_DOTS1,
|
1635
|
+
{
|
1636
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1637
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1638
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1639
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1640
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
1641
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
1642
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
1643
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
1644
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
1645
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
1646
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1647
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
1648
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1649
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1650
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
1651
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
1652
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
1653
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
1654
|
+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
|
1655
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
1656
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
1657
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
1658
|
+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
1659
|
+
}
|
1660
|
+
},
|
1556
1661
|
{
|
1557
1662
|
LLM_ARCH_UNKNOWN,
|
1558
1663
|
{
|
@@ -1681,6 +1786,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
1681
1786
|
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
|
1682
1787
|
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
|
1683
1788
|
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1789
|
+
// altup / laurel (gemma 3n)
|
1790
|
+
{LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_GET_ROWS}},
|
1791
|
+
{LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
|
1792
|
+
{LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
|
1793
|
+
{LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
|
1794
|
+
{LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
|
1795
|
+
{LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1796
|
+
{LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1797
|
+
{LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1798
|
+
{LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1799
|
+
{LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1800
|
+
{LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1801
|
+
{LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1802
|
+
{LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1803
|
+
{LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1804
|
+
{LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1805
|
+
{LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1684
1806
|
// this tensor is loaded for T5, but never used
|
1685
1807
|
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
|
1686
1808
|
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
|
@@ -1704,8 +1826,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
1704
1826
|
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
1705
1827
|
|
1706
1828
|
std::string LLM_KV::operator()(llm_kv kv) const {
|
1707
|
-
|
1708
|
-
|
1829
|
+
std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
1830
|
+
|
1831
|
+
if (suffix != nullptr) {
|
1832
|
+
name += ".";
|
1833
|
+
name += suffix;
|
1834
|
+
}
|
1835
|
+
|
1836
|
+
return name;
|
1709
1837
|
}
|
1710
1838
|
|
1711
1839
|
std::string LLM_TN_IMPL::str() const {
|
@@ -1744,3 +1872,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
|
|
1744
1872
|
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
|
1745
1873
|
return LLM_TENSOR_INFOS.at(tensor);
|
1746
1874
|
}
|
1875
|
+
|
1876
|
+
bool llm_arch_is_recurrent(const llm_arch & arch) {
|
1877
|
+
switch (arch) {
|
1878
|
+
case LLM_ARCH_MAMBA:
|
1879
|
+
case LLM_ARCH_RWKV6:
|
1880
|
+
case LLM_ARCH_RWKV6QWEN2:
|
1881
|
+
case LLM_ARCH_RWKV7:
|
1882
|
+
case LLM_ARCH_ARWKV7:
|
1883
|
+
return true;
|
1884
|
+
default:
|
1885
|
+
return false;
|
1886
|
+
}
|
1887
|
+
}
|
1888
|
+
|
1889
|
+
bool llm_arch_is_hybrid(const llm_arch & arch) {
|
1890
|
+
// TODO: There are currently no hybrid models! Once there are, this will be
|
1891
|
+
// the place to identify them
|
1892
|
+
switch (arch) {
|
1893
|
+
default:
|
1894
|
+
return false;
|
1895
|
+
}
|
1896
|
+
}
|
package/cpp/llama-arch.h
CHANGED
@@ -24,6 +24,7 @@ enum llm_arch {
|
|
24
24
|
LLM_ARCH_BERT,
|
25
25
|
LLM_ARCH_NOMIC_BERT,
|
26
26
|
LLM_ARCH_NOMIC_BERT_MOE,
|
27
|
+
LLM_ARCH_NEO_BERT,
|
27
28
|
LLM_ARCH_JINA_BERT_V2,
|
28
29
|
LLM_ARCH_BLOOM,
|
29
30
|
LLM_ARCH_STABLELM,
|
@@ -45,6 +46,7 @@ enum llm_arch {
|
|
45
46
|
LLM_ARCH_GEMMA,
|
46
47
|
LLM_ARCH_GEMMA2,
|
47
48
|
LLM_ARCH_GEMMA3,
|
49
|
+
LLM_ARCH_GEMMA3N,
|
48
50
|
LLM_ARCH_STARCODER2,
|
49
51
|
LLM_ARCH_MAMBA,
|
50
52
|
LLM_ARCH_XVERSE,
|
@@ -76,6 +78,8 @@ enum llm_arch {
|
|
76
78
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
77
79
|
LLM_ARCH_PLM,
|
78
80
|
LLM_ARCH_BAILINGMOE,
|
81
|
+
LLM_ARCH_DOTS1,
|
82
|
+
LLM_ARCH_ARCEE,
|
79
83
|
LLM_ARCH_UNKNOWN,
|
80
84
|
};
|
81
85
|
|
@@ -148,6 +152,7 @@ enum llm_kv {
|
|
148
152
|
LLM_KV_ATTENTION_SCALE,
|
149
153
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
150
154
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
155
|
+
LLM_KV_ATTENTION_LAYER_INDICES,
|
151
156
|
|
152
157
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
153
158
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
@@ -190,13 +195,13 @@ enum llm_kv {
|
|
190
195
|
LLM_KV_TOKENIZER_MASK_ID,
|
191
196
|
LLM_KV_TOKENIZER_ADD_BOS,
|
192
197
|
LLM_KV_TOKENIZER_ADD_EOS,
|
198
|
+
LLM_KV_TOKENIZER_ADD_SEP,
|
193
199
|
LLM_KV_TOKENIZER_ADD_PREFIX,
|
194
200
|
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
195
201
|
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
196
202
|
LLM_KV_TOKENIZER_HF_JSON,
|
197
203
|
LLM_KV_TOKENIZER_RWKV,
|
198
204
|
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
199
|
-
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
|
200
205
|
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
201
206
|
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
202
207
|
LLM_KV_TOKENIZER_FIM_MID_ID,
|
@@ -213,6 +218,8 @@ enum llm_kv {
|
|
213
218
|
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
214
219
|
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
215
220
|
|
221
|
+
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
222
|
+
|
216
223
|
// deprecated:
|
217
224
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
218
225
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
@@ -263,6 +270,22 @@ enum llm_tensor {
|
|
263
270
|
LLM_TENSOR_LAYER_OUT_NORM,
|
264
271
|
LLM_TENSOR_POST_ATTN_NORM,
|
265
272
|
LLM_TENSOR_POST_MLP_NORM,
|
273
|
+
LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
|
274
|
+
LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
|
275
|
+
LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
|
276
|
+
LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
|
277
|
+
LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
|
278
|
+
LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
|
279
|
+
LLM_TENSOR_ALTUP_PROJ, // gemma3n
|
280
|
+
LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
|
281
|
+
LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
|
282
|
+
LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
|
283
|
+
LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
|
284
|
+
LLM_TENSOR_ALTUP_ROUTER, // gemma3n
|
285
|
+
LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
|
286
|
+
LLM_TENSOR_LAUREL_L, // gemma3n
|
287
|
+
LLM_TENSOR_LAUREL_R, // gemma3n
|
288
|
+
LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
|
266
289
|
LLM_TENSOR_SSM_IN,
|
267
290
|
LLM_TENSOR_SSM_CONV1D,
|
268
291
|
LLM_TENSOR_SSM_X,
|
@@ -435,3 +458,6 @@ const char * llm_arch_name(llm_arch arch);
|
|
435
458
|
llm_arch llm_arch_from_string(const std::string & name);
|
436
459
|
|
437
460
|
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
461
|
+
|
462
|
+
bool llm_arch_is_recurrent(const llm_arch & arch);
|
463
|
+
bool llm_arch_is_hybrid (const llm_arch & arch);
|