cui-llama.rn 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +317 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +124 -117
- package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1263 -1245
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/README.md +4 -4
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -523
- package/cpp/ggml-metal.m +798 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +30 -6
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +873 -882
- package/cpp/rn-llama.h +138 -148
- package/cpp/sampling.cpp +3 -0
- package/cpp/sampling.h +107 -107
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +116 -105
- package/ios/RNLlama.h +7 -7
- package/ios/RNLlama.mm +418 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +835 -819
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +6 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +426 -424
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -485
package/cpp/llama-arch.cpp
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
8
8
|
{ LLM_ARCH_LLAMA, "llama" },
|
9
|
+
{ LLM_ARCH_LLAMA4, "llama4" },
|
9
10
|
{ LLM_ARCH_DECI, "deci" },
|
10
11
|
{ LLM_ARCH_FALCON, "falcon" },
|
11
12
|
{ LLM_ARCH_GROK, "grok" },
|
@@ -25,6 +26,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
25
26
|
{ LLM_ARCH_QWEN2, "qwen2" },
|
26
27
|
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
27
28
|
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
|
29
|
+
{ LLM_ARCH_QWEN3, "qwen3" },
|
30
|
+
{ LLM_ARCH_QWEN3MOE, "qwen3moe" },
|
28
31
|
{ LLM_ARCH_PHI2, "phi2" },
|
29
32
|
{ LLM_ARCH_PHI3, "phi3" },
|
30
33
|
{ LLM_ARCH_PHIMOE, "phimoe" },
|
@@ -59,10 +62,14 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
59
62
|
{ LLM_ARCH_EXAONE, "exaone" },
|
60
63
|
{ LLM_ARCH_RWKV6, "rwkv6" },
|
61
64
|
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
|
65
|
+
{ LLM_ARCH_RWKV7, "rwkv7" },
|
66
|
+
{ LLM_ARCH_ARWKV7, "arwkv7" },
|
62
67
|
{ LLM_ARCH_GRANITE, "granite" },
|
63
68
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
64
69
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
65
70
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
71
|
+
{ LLM_ARCH_PLM, "plm" },
|
72
|
+
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
66
73
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
67
74
|
};
|
68
75
|
|
@@ -71,6 +78,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
71
78
|
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
|
72
79
|
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
|
73
80
|
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
|
81
|
+
{ LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
|
74
82
|
{ LLM_KV_GENERAL_NAME, "general.name" },
|
75
83
|
{ LLM_KV_GENERAL_AUTHOR, "general.author" },
|
76
84
|
{ LLM_KV_GENERAL_VERSION, "general.version" },
|
@@ -109,23 +117,28 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
109
117
|
{ LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
|
110
118
|
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
111
119
|
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
|
120
|
+
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
|
112
121
|
|
113
|
-
{ LLM_KV_ATTENTION_HEAD_COUNT,
|
114
|
-
{ LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
115
|
-
{ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
|
116
|
-
{ LLM_KV_ATTENTION_CLAMP_KQV,
|
117
|
-
{ LLM_KV_ATTENTION_KEY_LENGTH,
|
118
|
-
{ LLM_KV_ATTENTION_VALUE_LENGTH,
|
119
|
-
{ LLM_KV_ATTENTION_LAYERNORM_EPS,
|
120
|
-
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
121
|
-
{ LLM_KV_ATTENTION_GROUPNORM_EPS,
|
122
|
-
{ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
|
123
|
-
{ LLM_KV_ATTENTION_CAUSAL,
|
124
|
-
{ LLM_KV_ATTENTION_Q_LORA_RANK,
|
125
|
-
{ LLM_KV_ATTENTION_KV_LORA_RANK,
|
126
|
-
{
|
127
|
-
{
|
128
|
-
{
|
122
|
+
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
123
|
+
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
124
|
+
{ LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
|
125
|
+
{ LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
|
126
|
+
{ LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
|
127
|
+
{ LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
|
128
|
+
{ LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
|
129
|
+
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
|
130
|
+
{ LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
|
131
|
+
{ LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
|
132
|
+
{ LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
|
133
|
+
{ LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
|
134
|
+
{ LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
|
135
|
+
{ LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
|
136
|
+
{ LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
|
137
|
+
{ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
|
138
|
+
{ LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
|
139
|
+
{ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
|
140
|
+
{ LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
|
141
|
+
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
129
142
|
|
130
143
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
131
144
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
@@ -224,6 +237,35 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
224
237
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
225
238
|
},
|
226
239
|
},
|
240
|
+
{
|
241
|
+
LLM_ARCH_LLAMA4,
|
242
|
+
{
|
243
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
244
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
245
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
246
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
247
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
248
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
249
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
250
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
251
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
252
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
253
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
254
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
255
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
256
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
257
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
258
|
+
{ LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
|
259
|
+
{ LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
|
260
|
+
{ LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
|
261
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
262
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
263
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
264
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
265
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
266
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
267
|
+
},
|
268
|
+
},
|
227
269
|
{
|
228
270
|
LLM_ARCH_DECI,
|
229
271
|
{
|
@@ -555,6 +597,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
555
597
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
556
598
|
},
|
557
599
|
},
|
600
|
+
{
|
601
|
+
LLM_ARCH_QWEN3,
|
602
|
+
{
|
603
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
604
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
605
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
606
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
607
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
608
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
609
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
610
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
611
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
612
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
613
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
614
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
615
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
616
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
617
|
+
},
|
618
|
+
},
|
619
|
+
{
|
620
|
+
LLM_ARCH_QWEN3MOE,
|
621
|
+
{
|
622
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
623
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
624
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
625
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
626
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
627
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
628
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
629
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
630
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
631
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
632
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
633
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
634
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
635
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
636
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
637
|
+
},
|
638
|
+
},
|
558
639
|
{
|
559
640
|
LLM_ARCH_PHI2,
|
560
641
|
{
|
@@ -772,6 +853,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
772
853
|
{
|
773
854
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
774
855
|
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
856
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
775
857
|
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
776
858
|
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
777
859
|
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
@@ -1036,6 +1118,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
1036
1118
|
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
1037
1119
|
},
|
1038
1120
|
},
|
1121
|
+
{
|
1122
|
+
LLM_ARCH_PLM,
|
1123
|
+
{
|
1124
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1125
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1126
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1127
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
1128
|
+
{ LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
|
1129
|
+
{ LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
|
1130
|
+
{ LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
|
1131
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
1132
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1133
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1134
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1135
|
+
},
|
1136
|
+
},
|
1039
1137
|
{
|
1040
1138
|
LLM_ARCH_CHATGLM,
|
1041
1139
|
{
|
@@ -1238,6 +1336,74 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
1238
1336
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1239
1337
|
},
|
1240
1338
|
},
|
1339
|
+
{
|
1340
|
+
LLM_ARCH_RWKV7,
|
1341
|
+
{
|
1342
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1343
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
1344
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1345
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1346
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1347
|
+
{ LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
|
1348
|
+
{ LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
|
1349
|
+
{ LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
|
1350
|
+
{ LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
|
1351
|
+
{ LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
|
1352
|
+
{ LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
|
1353
|
+
{ LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
|
1354
|
+
{ LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
|
1355
|
+
{ LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
|
1356
|
+
{ LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
|
1357
|
+
{ LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
|
1358
|
+
{ LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
|
1359
|
+
{ LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
|
1360
|
+
{ LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
|
1361
|
+
{ LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
|
1362
|
+
{ LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
|
1363
|
+
{ LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
|
1364
|
+
{ LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
|
1365
|
+
{ LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
|
1366
|
+
{ LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
|
1367
|
+
{ LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
|
1368
|
+
{ LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
|
1369
|
+
{ LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
|
1370
|
+
{ LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
|
1371
|
+
},
|
1372
|
+
},
|
1373
|
+
{
|
1374
|
+
LLM_ARCH_ARWKV7,
|
1375
|
+
{
|
1376
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1377
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
1378
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1379
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1380
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1381
|
+
{ LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
|
1382
|
+
{ LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
|
1383
|
+
{ LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
|
1384
|
+
{ LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
|
1385
|
+
{ LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
|
1386
|
+
{ LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
|
1387
|
+
{ LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
|
1388
|
+
{ LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
|
1389
|
+
{ LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
|
1390
|
+
{ LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
|
1391
|
+
{ LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
|
1392
|
+
{ LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
|
1393
|
+
{ LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
|
1394
|
+
{ LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
|
1395
|
+
{ LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
|
1396
|
+
{ LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
|
1397
|
+
{ LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
|
1398
|
+
{ LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
|
1399
|
+
{ LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
|
1400
|
+
{ LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
|
1401
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1402
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
1403
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
1404
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
1405
|
+
},
|
1406
|
+
},
|
1241
1407
|
{
|
1242
1408
|
LLM_ARCH_GRANITE,
|
1243
1409
|
{
|
@@ -1317,6 +1483,29 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
1317
1483
|
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
|
1318
1484
|
},
|
1319
1485
|
},
|
1486
|
+
{
|
1487
|
+
LLM_ARCH_BAILINGMOE,
|
1488
|
+
{
|
1489
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
1490
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
1491
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
1492
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
1493
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
1494
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
1495
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
1496
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
1497
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
1498
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
1499
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
1500
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
1501
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
1502
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
1503
|
+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
|
1504
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
1505
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
1506
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
1507
|
+
},
|
1508
|
+
},
|
1320
1509
|
{
|
1321
1510
|
LLM_ARCH_UNKNOWN,
|
1322
1511
|
{
|
@@ -1397,6 +1586,12 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
1397
1586
|
{LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1398
1587
|
{LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1399
1588
|
{LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1589
|
+
{LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1590
|
+
{LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1591
|
+
{LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1592
|
+
{LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1593
|
+
{LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1594
|
+
{LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1400
1595
|
{LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1401
1596
|
{LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
1402
1597
|
{LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
|
@@ -1415,6 +1610,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
1415
1610
|
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1416
1611
|
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1417
1612
|
{LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1613
|
+
{LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1614
|
+
{LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1615
|
+
{LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1418
1616
|
{LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1419
1617
|
{LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1420
1618
|
{LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
@@ -1422,6 +1620,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
1422
1620
|
{LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1423
1621
|
{LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1424
1622
|
{LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1623
|
+
{LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1624
|
+
{LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1625
|
+
{LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
|
1425
1626
|
{LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
|
1426
1627
|
{LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
1427
1628
|
{LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
|
package/cpp/llama-arch.h
CHANGED
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
enum llm_arch {
|
12
12
|
LLM_ARCH_LLAMA,
|
13
|
+
LLM_ARCH_LLAMA4,
|
13
14
|
LLM_ARCH_DECI,
|
14
15
|
LLM_ARCH_FALCON,
|
15
16
|
LLM_ARCH_BAICHUAN,
|
@@ -29,6 +30,8 @@ enum llm_arch {
|
|
29
30
|
LLM_ARCH_QWEN2,
|
30
31
|
LLM_ARCH_QWEN2MOE,
|
31
32
|
LLM_ARCH_QWEN2VL,
|
33
|
+
LLM_ARCH_QWEN3,
|
34
|
+
LLM_ARCH_QWEN3MOE,
|
32
35
|
LLM_ARCH_PHI2,
|
33
36
|
LLM_ARCH_PHI3,
|
34
37
|
LLM_ARCH_PHIMOE,
|
@@ -63,10 +66,14 @@ enum llm_arch {
|
|
63
66
|
LLM_ARCH_EXAONE,
|
64
67
|
LLM_ARCH_RWKV6,
|
65
68
|
LLM_ARCH_RWKV6QWEN2,
|
69
|
+
LLM_ARCH_RWKV7,
|
70
|
+
LLM_ARCH_ARWKV7,
|
66
71
|
LLM_ARCH_GRANITE,
|
67
72
|
LLM_ARCH_GRANITE_MOE,
|
68
73
|
LLM_ARCH_CHAMELEON,
|
69
74
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
75
|
+
LLM_ARCH_PLM,
|
76
|
+
LLM_ARCH_BAILINGMOE,
|
70
77
|
LLM_ARCH_UNKNOWN,
|
71
78
|
};
|
72
79
|
|
@@ -75,6 +82,7 @@ enum llm_kv {
|
|
75
82
|
LLM_KV_GENERAL_ARCHITECTURE,
|
76
83
|
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
77
84
|
LLM_KV_GENERAL_ALIGNMENT,
|
85
|
+
LLM_KV_GENERAL_FILE_TYPE,
|
78
86
|
LLM_KV_GENERAL_NAME,
|
79
87
|
LLM_KV_GENERAL_AUTHOR,
|
80
88
|
LLM_KV_GENERAL_VERSION,
|
@@ -113,6 +121,7 @@ enum llm_kv {
|
|
113
121
|
LLM_KV_RESIDUAL_SCALE,
|
114
122
|
LLM_KV_EMBEDDING_SCALE,
|
115
123
|
LLM_KV_TOKEN_SHIFT_COUNT,
|
124
|
+
LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
|
116
125
|
|
117
126
|
LLM_KV_ATTENTION_HEAD_COUNT,
|
118
127
|
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
@@ -127,6 +136,10 @@ enum llm_kv {
|
|
127
136
|
LLM_KV_ATTENTION_CAUSAL,
|
128
137
|
LLM_KV_ATTENTION_Q_LORA_RANK,
|
129
138
|
LLM_KV_ATTENTION_KV_LORA_RANK,
|
139
|
+
LLM_KV_ATTENTION_DECAY_LORA_RANK,
|
140
|
+
LLM_KV_ATTENTION_ICLR_LORA_RANK,
|
141
|
+
LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
|
142
|
+
LLM_KV_ATTENTION_GATE_LORA_RANK,
|
130
143
|
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
131
144
|
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
132
145
|
LLM_KV_ATTENTION_SCALE,
|
@@ -250,8 +263,20 @@ enum llm_tensor {
|
|
250
263
|
LLM_TENSOR_SSM_A,
|
251
264
|
LLM_TENSOR_SSM_D,
|
252
265
|
LLM_TENSOR_SSM_OUT,
|
266
|
+
LLM_TENSOR_TIME_MIX_W0,
|
253
267
|
LLM_TENSOR_TIME_MIX_W1,
|
254
268
|
LLM_TENSOR_TIME_MIX_W2,
|
269
|
+
LLM_TENSOR_TIME_MIX_A0,
|
270
|
+
LLM_TENSOR_TIME_MIX_A1,
|
271
|
+
LLM_TENSOR_TIME_MIX_A2,
|
272
|
+
LLM_TENSOR_TIME_MIX_V0,
|
273
|
+
LLM_TENSOR_TIME_MIX_V1,
|
274
|
+
LLM_TENSOR_TIME_MIX_V2,
|
275
|
+
LLM_TENSOR_TIME_MIX_G1,
|
276
|
+
LLM_TENSOR_TIME_MIX_G2,
|
277
|
+
LLM_TENSOR_TIME_MIX_K_K,
|
278
|
+
LLM_TENSOR_TIME_MIX_K_A,
|
279
|
+
LLM_TENSOR_TIME_MIX_R_K,
|
255
280
|
LLM_TENSOR_TIME_MIX_LERP_X,
|
256
281
|
LLM_TENSOR_TIME_MIX_LERP_W,
|
257
282
|
LLM_TENSOR_TIME_MIX_LERP_K,
|
package/cpp/llama-batch.h
CHANGED
@@ -42,9 +42,9 @@ struct llama_sbatch {
|
|
42
42
|
bool logits_all; // TODO: remove once lctx.logits_all is removed too
|
43
43
|
|
44
44
|
// sorted indices into the batch
|
45
|
-
std::vector<
|
45
|
+
std::vector<int64_t> ids;
|
46
46
|
// batch indices of the output
|
47
|
-
std::vector<
|
47
|
+
std::vector<int64_t> out_ids;
|
48
48
|
std::vector<llama_sbatch_seq> seq;
|
49
49
|
|
50
50
|
const llama_batch * batch = nullptr;
|
package/cpp/llama-chat.cpp
CHANGED
@@ -59,6 +59,9 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
|
59
59
|
{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
|
60
60
|
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
|
61
61
|
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
|
62
|
+
{ "yandex", LLM_CHAT_TEMPLATE_YANDEX },
|
63
|
+
{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
|
64
|
+
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
|
62
65
|
};
|
63
66
|
|
64
67
|
llm_chat_template llm_chat_template_from_str(const std::string & name) {
|
@@ -168,6 +171,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
|
168
171
|
return LLM_CHAT_TEMPLATE_GIGACHAT;
|
169
172
|
} else if (tmpl_contains("<|role_start|>")) {
|
170
173
|
return LLM_CHAT_TEMPLATE_MEGREZ;
|
174
|
+
} else if (tmpl_contains(" Ассистент:")) {
|
175
|
+
return LLM_CHAT_TEMPLATE_YANDEX;
|
176
|
+
} else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
|
177
|
+
return LLM_CHAT_TEMPLATE_BAILING;
|
178
|
+
} else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
|
179
|
+
return LLM_CHAT_TEMPLATE_LLAMA4;
|
171
180
|
}
|
172
181
|
return LLM_CHAT_TEMPLATE_UNKNOWN;
|
173
182
|
}
|
@@ -567,7 +576,51 @@ int32_t llm_chat_apply_template(
|
|
567
576
|
if (add_ass) {
|
568
577
|
ss << "<|role_start|>assistant<|role_end|>";
|
569
578
|
}
|
570
|
-
} else {
|
579
|
+
} else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
|
580
|
+
// Yandex template ("\n\n" is defined as EOT token)
|
581
|
+
|
582
|
+
ss << "<s>";
|
583
|
+
|
584
|
+
for (size_t i = 0; i < chat.size(); i++) {
|
585
|
+
std::string role(chat[i]->role);
|
586
|
+
if (role == "user") {
|
587
|
+
ss << " Пользователь: " << chat[i]->content << "\n\n";
|
588
|
+
} else if (role == "assistant") {
|
589
|
+
ss << " Ассистент: " << chat[i]->content << "\n\n";
|
590
|
+
}
|
591
|
+
}
|
592
|
+
|
593
|
+
// Add generation prompt if needed
|
594
|
+
if (add_ass) {
|
595
|
+
ss << " Ассистент:[SEP]";
|
596
|
+
}
|
597
|
+
} else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
|
598
|
+
// Bailing (Ling) template
|
599
|
+
for (auto message : chat) {
|
600
|
+
std::string role(message->role);
|
601
|
+
|
602
|
+
if (role == "user") {
|
603
|
+
role = "HUMAN";
|
604
|
+
} else {
|
605
|
+
std::transform(role.begin(), role.end(), role.begin(), ::toupper);
|
606
|
+
}
|
607
|
+
|
608
|
+
ss << "<role>" << role << "</role>" << message->content;
|
609
|
+
}
|
610
|
+
|
611
|
+
if (add_ass) {
|
612
|
+
ss << "<role>ASSISTANT</role>";
|
613
|
+
}
|
614
|
+
} else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
|
615
|
+
// Llama 4
|
616
|
+
for (auto message : chat) {
|
617
|
+
std::string role(message->role);
|
618
|
+
ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
|
619
|
+
}
|
620
|
+
if (add_ass) {
|
621
|
+
ss << "<|header_start|>assistant<|header_end|>\n\n";
|
622
|
+
}
|
623
|
+
} else {
|
571
624
|
// template not supported
|
572
625
|
return -1;
|
573
626
|
}
|
@@ -585,4 +638,3 @@ int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
|
|
585
638
|
}
|
586
639
|
return (int32_t) LLM_CHAT_TEMPLATES.size();
|
587
640
|
}
|
588
|
-
|
package/cpp/llama-chat.h
CHANGED