cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/llama-graph.h
CHANGED
@@ -19,6 +19,8 @@ struct llama_cparams;
|
|
19
19
|
|
20
20
|
class llama_memory_i;
|
21
21
|
class llama_kv_cache_unified;
|
22
|
+
class llama_kv_cache_unified_iswa;
|
23
|
+
class llama_kv_cache_recurrent;
|
22
24
|
|
23
25
|
// certain models (typically multi-modal) can produce different types of graphs
|
24
26
|
enum llm_graph_type {
|
@@ -90,29 +92,27 @@ public:
|
|
90
92
|
|
91
93
|
class llm_graph_input_pos : public llm_graph_input_i {
|
92
94
|
public:
|
93
|
-
llm_graph_input_pos(int64_t
|
95
|
+
llm_graph_input_pos(int64_t n_pos_per_embd) : n_pos_per_embd(n_pos_per_embd) {}
|
94
96
|
virtual ~llm_graph_input_pos() = default;
|
95
97
|
|
96
98
|
void set_input(const llama_ubatch * ubatch) override;
|
97
99
|
|
98
100
|
lm_ggml_tensor * pos = nullptr; // I32 [n_batch]
|
99
101
|
|
100
|
-
const int64_t
|
102
|
+
const int64_t n_pos_per_embd = 1;
|
101
103
|
};
|
102
104
|
|
103
105
|
// temperature tuning, used by llama4
|
104
106
|
class llm_graph_input_attn_temp : public llm_graph_input_i {
|
105
107
|
public:
|
106
|
-
llm_graph_input_attn_temp(
|
107
|
-
:
|
108
|
+
llm_graph_input_attn_temp(uint32_t n_attn_temp_floor_scale, float f_attn_temp_scale)
|
109
|
+
: n_attn_temp_floor_scale(n_attn_temp_floor_scale), f_attn_temp_scale(f_attn_temp_scale) {}
|
108
110
|
virtual ~llm_graph_input_attn_temp() = default;
|
109
111
|
|
110
112
|
void set_input(const llama_ubatch * ubatch) override;
|
111
113
|
|
112
114
|
lm_ggml_tensor * attn_scale = nullptr; // F32 [n_batch]
|
113
115
|
|
114
|
-
const int64_t n_pos_per_token = 1;
|
115
|
-
|
116
116
|
const uint32_t n_attn_temp_floor_scale;
|
117
117
|
const float f_attn_temp_scale;
|
118
118
|
};
|
@@ -188,26 +188,26 @@ public:
|
|
188
188
|
|
189
189
|
class llm_graph_input_s_copy : public llm_graph_input_i {
|
190
190
|
public:
|
191
|
-
llm_graph_input_s_copy(const
|
191
|
+
llm_graph_input_s_copy(const llama_kv_cache_recurrent * kv_self) : kv_self(kv_self) {}
|
192
192
|
virtual ~llm_graph_input_s_copy() = default;
|
193
193
|
|
194
194
|
void set_input(const llama_ubatch * ubatch) override;
|
195
195
|
|
196
196
|
lm_ggml_tensor * s_copy; // I32 [kv_size]
|
197
197
|
|
198
|
-
const
|
198
|
+
const llama_kv_cache_recurrent * kv_self;
|
199
199
|
};
|
200
200
|
|
201
201
|
class llm_graph_input_s_mask : public llm_graph_input_i {
|
202
202
|
public:
|
203
|
-
llm_graph_input_s_mask(const
|
203
|
+
llm_graph_input_s_mask(const llama_kv_cache_recurrent * kv_self) : kv_self(kv_self) {}
|
204
204
|
virtual ~llm_graph_input_s_mask() = default;
|
205
205
|
|
206
206
|
void set_input(const llama_ubatch * ubatch) override;
|
207
207
|
|
208
208
|
lm_ggml_tensor * s_mask; // F32 [1, n_kv]
|
209
209
|
|
210
|
-
const
|
210
|
+
const llama_kv_cache_recurrent * kv_self;
|
211
211
|
};
|
212
212
|
|
213
213
|
class llm_graph_input_cross_embd : public llm_graph_input_i {
|
@@ -256,6 +256,31 @@ public:
|
|
256
256
|
|
257
257
|
void set_input(const llama_ubatch * ubatch) override;
|
258
258
|
|
259
|
+
lm_ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }
|
260
|
+
|
261
|
+
lm_ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch]
|
262
|
+
lm_ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch]
|
263
|
+
|
264
|
+
const llama_hparams & hparams;
|
265
|
+
const llama_cparams & cparams;
|
266
|
+
|
267
|
+
const llama_kv_cache_unified * kv_self;
|
268
|
+
};
|
269
|
+
|
270
|
+
class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i {
|
271
|
+
public:
|
272
|
+
llm_graph_input_attn_kv_unified_iswa(
|
273
|
+
const llama_hparams & hparams,
|
274
|
+
const llama_cparams & cparams,
|
275
|
+
const llama_kv_cache_unified_iswa * kv_self) :
|
276
|
+
hparams(hparams),
|
277
|
+
cparams(cparams),
|
278
|
+
kv_self(kv_self) {
|
279
|
+
}
|
280
|
+
~llm_graph_input_attn_kv_unified_iswa() = default;
|
281
|
+
|
282
|
+
void set_input(const llama_ubatch * ubatch) override;
|
283
|
+
|
259
284
|
lm_ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }
|
260
285
|
lm_ggml_tensor * get_kq_mask_swa() const { return self_kq_mask_swa_cnv; }
|
261
286
|
|
@@ -267,7 +292,7 @@ public:
|
|
267
292
|
const llama_hparams & hparams;
|
268
293
|
const llama_cparams & cparams;
|
269
294
|
|
270
|
-
const
|
295
|
+
const llama_kv_cache_unified_iswa * kv_self;
|
271
296
|
};
|
272
297
|
|
273
298
|
class llm_graph_input_attn_cross : public llm_graph_input_i {
|
@@ -299,6 +324,7 @@ class llm_graph_result_i {
|
|
299
324
|
public:
|
300
325
|
virtual ~llm_graph_result_i() = default;
|
301
326
|
|
327
|
+
virtual lm_ggml_tensor * get_tokens() = 0;
|
302
328
|
virtual lm_ggml_tensor * get_logits() = 0;
|
303
329
|
virtual lm_ggml_tensor * get_embd() = 0;
|
304
330
|
virtual lm_ggml_tensor * get_embd_pooled() = 0;
|
@@ -313,6 +339,7 @@ class llm_graph_result : public llm_graph_result_i {
|
|
313
339
|
public:
|
314
340
|
virtual ~llm_graph_result() = default;
|
315
341
|
|
342
|
+
lm_ggml_tensor * get_tokens() override { return t_tokens; }
|
316
343
|
lm_ggml_tensor * get_logits() override { return t_logits; }
|
317
344
|
lm_ggml_tensor * get_embd() override { return t_embd; }
|
318
345
|
lm_ggml_tensor * get_embd_pooled() override { return t_embd_pooled; }
|
@@ -329,6 +356,7 @@ public:
|
|
329
356
|
}
|
330
357
|
|
331
358
|
// important graph nodes
|
359
|
+
lm_ggml_tensor * t_tokens = nullptr;
|
332
360
|
lm_ggml_tensor * t_logits = nullptr;
|
333
361
|
lm_ggml_tensor * t_embd = nullptr;
|
334
362
|
lm_ggml_tensor * t_embd_pooled = nullptr;
|
@@ -352,8 +380,8 @@ struct llm_graph_params {
|
|
352
380
|
const llama_cparams & cparams;
|
353
381
|
const llama_ubatch & ubatch;
|
354
382
|
|
355
|
-
|
356
|
-
|
383
|
+
lm_ggml_backend_sched_t sched;
|
384
|
+
lm_ggml_backend_t backend_cpu;
|
357
385
|
|
358
386
|
const llama_adapter_cvec * cvec;
|
359
387
|
const llama_adapter_loras * loras;
|
@@ -376,7 +404,6 @@ struct llm_graph_context {
|
|
376
404
|
const int64_t n_layer;
|
377
405
|
const int64_t n_rot;
|
378
406
|
const int64_t n_ctx; // user-specified context size (can be different from n_ctx_train)
|
379
|
-
const int64_t n_ctx_per_seq;
|
380
407
|
const int64_t n_head;
|
381
408
|
const int64_t n_head_kv;
|
382
409
|
const int64_t n_embd_head_k;
|
@@ -404,9 +431,9 @@ struct llm_graph_context {
|
|
404
431
|
|
405
432
|
lm_ggml_context * ctx0 = nullptr;
|
406
433
|
|
407
|
-
|
434
|
+
lm_ggml_backend_sched_t sched;
|
408
435
|
|
409
|
-
|
436
|
+
lm_ggml_backend_t backend_cpu; // TODO: needed by build_attn_mha, figure out a way to remove?
|
410
437
|
|
411
438
|
const llama_adapter_cvec * cvec;
|
412
439
|
const llama_adapter_loras * loras;
|
@@ -419,7 +446,7 @@ struct llm_graph_context {
|
|
419
446
|
|
420
447
|
llm_graph_context(const llm_graph_params & params);
|
421
448
|
|
422
|
-
int64_t
|
449
|
+
int64_t n_pos_per_embd() const;
|
423
450
|
|
424
451
|
void cb(lm_ggml_tensor * cur, const char * name, int il) const;
|
425
452
|
|
@@ -505,12 +532,12 @@ struct llm_graph_context {
|
|
505
532
|
|
506
533
|
lm_ggml_tensor * build_attn_mha(
|
507
534
|
lm_ggml_cgraph * gf,
|
508
|
-
lm_ggml_tensor * q,
|
509
|
-
lm_ggml_tensor * k,
|
510
|
-
lm_ggml_tensor * v,
|
535
|
+
lm_ggml_tensor * q, // [n_embd_head_q, n_head_q, n_tokens]
|
536
|
+
lm_ggml_tensor * k, // [n_embd_head_k, n_head_k, n_tokens]
|
537
|
+
lm_ggml_tensor * v, // [n_embd_head_v, n_head_v, n_tokens] (v_trans == false)
|
511
538
|
lm_ggml_tensor * kq_b,
|
512
539
|
lm_ggml_tensor * kq_mask,
|
513
|
-
|
540
|
+
lm_ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v]
|
514
541
|
float kq_scale) const;
|
515
542
|
|
516
543
|
llm_graph_input_attn_no_cache * build_attn_inp_no_cache() const;
|
@@ -524,6 +551,7 @@ struct llm_graph_context {
|
|
524
551
|
lm_ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens]
|
525
552
|
lm_ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens]
|
526
553
|
lm_ggml_tensor * kq_b,
|
554
|
+
lm_ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v]
|
527
555
|
float kq_scale,
|
528
556
|
int il) const;
|
529
557
|
|
@@ -538,6 +566,22 @@ struct llm_graph_context {
|
|
538
566
|
lm_ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens]
|
539
567
|
lm_ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens]
|
540
568
|
lm_ggml_tensor * kq_b,
|
569
|
+
lm_ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v]
|
570
|
+
float kq_scale,
|
571
|
+
int il) const;
|
572
|
+
|
573
|
+
llm_graph_input_attn_kv_unified_iswa * build_attn_inp_kv_unified_iswa() const;
|
574
|
+
|
575
|
+
lm_ggml_tensor * build_attn(
|
576
|
+
llm_graph_input_attn_kv_unified_iswa * inp,
|
577
|
+
lm_ggml_cgraph * gf,
|
578
|
+
lm_ggml_tensor * wo,
|
579
|
+
lm_ggml_tensor * wo_b,
|
580
|
+
lm_ggml_tensor * q_cur, // [n_embd_head_q, n_head_q, n_tokens]
|
581
|
+
lm_ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens]
|
582
|
+
lm_ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens]
|
583
|
+
lm_ggml_tensor * kq_b,
|
584
|
+
lm_ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v]
|
541
585
|
float kq_scale,
|
542
586
|
int il) const;
|
543
587
|
|
@@ -552,6 +596,7 @@ struct llm_graph_context {
|
|
552
596
|
lm_ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens]
|
553
597
|
lm_ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens]
|
554
598
|
lm_ggml_tensor * kq_b,
|
599
|
+
lm_ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v]
|
555
600
|
float kq_scale,
|
556
601
|
int il) const;
|
557
602
|
|
@@ -590,3 +635,6 @@ struct llm_graph_context {
|
|
590
635
|
lm_ggml_tensor * cls_out,
|
591
636
|
lm_ggml_tensor * cls_out_b) const;
|
592
637
|
};
|
638
|
+
|
639
|
+
// TODO: better name
|
640
|
+
int32_t llama_relative_position_bucket(llama_pos x, llama_pos y, uint64_t n_buckets, bool bidirectional);
|
package/cpp/llama-hparams.cpp
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
#include "ggml.h"
|
4
4
|
|
5
|
+
void llama_hparams::set_swa_pattern(uint32_t n_pattern) {
|
6
|
+
for (uint32_t il = 0; il < n_layer; ++il) {
|
7
|
+
swa_layers[il] = n_pattern == 0 || (il % n_pattern < (n_pattern - 1));
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
11
|
+
bool llama_hparams::is_swa_any() const {
|
12
|
+
for (uint32_t il = 0; il < n_layer; ++il) {
|
13
|
+
if (swa_layers[il]) {
|
14
|
+
return true;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
return false;
|
19
|
+
}
|
20
|
+
|
5
21
|
uint32_t llama_hparams::n_head(uint32_t il) const {
|
6
22
|
if (il < n_layer) {
|
7
23
|
return n_head_arr[il];
|
@@ -72,7 +88,7 @@ uint32_t llama_hparams::n_embd_v_s() const {
|
|
72
88
|
|
73
89
|
bool llama_hparams::is_swa(uint32_t il) const {
|
74
90
|
if (il < n_layer) {
|
75
|
-
return
|
91
|
+
return swa_layers[il];
|
76
92
|
}
|
77
93
|
|
78
94
|
LM_GGML_ABORT("fatal error");
|
package/cpp/llama-hparams.h
CHANGED
@@ -14,6 +14,12 @@ enum llama_expert_gating_func_type {
|
|
14
14
|
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID = 2,
|
15
15
|
};
|
16
16
|
|
17
|
+
enum llama_swa_type {
|
18
|
+
LLAMA_SWA_TYPE_NONE = 0,
|
19
|
+
LLAMA_SWA_TYPE_STANDARD = 1,
|
20
|
+
LLAMA_SWA_TYPE_CHUNKED = 2,
|
21
|
+
};
|
22
|
+
|
17
23
|
struct llama_hparams_posnet {
|
18
24
|
uint32_t n_embd;
|
19
25
|
uint32_t n_layer;
|
@@ -35,14 +41,16 @@ struct llama_hparams {
|
|
35
41
|
uint32_t n_embd_features = 0;
|
36
42
|
uint32_t n_layer;
|
37
43
|
uint32_t n_rot;
|
38
|
-
uint32_t n_swa = 0; // sliding window attention (SWA)
|
39
|
-
uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
|
40
44
|
uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
|
41
45
|
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
|
42
46
|
uint32_t n_expert = 0;
|
43
47
|
uint32_t n_expert_used = 0;
|
44
48
|
uint32_t n_rel_attn_bkts = 0;
|
45
49
|
|
50
|
+
// note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA
|
51
|
+
uint32_t n_embd_head_k_mla = 0;
|
52
|
+
uint32_t n_embd_head_v_mla = 0;
|
53
|
+
|
46
54
|
// for WavTokenizer
|
47
55
|
struct llama_hparams_posnet posnet;
|
48
56
|
struct llama_hparams_convnext convnext;
|
@@ -62,6 +70,7 @@ struct llama_hparams {
|
|
62
70
|
float expert_weights_scale = 0.0;
|
63
71
|
bool expert_weights_norm = false;
|
64
72
|
uint32_t expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;
|
73
|
+
uint32_t moe_every_n_layers = 0;
|
65
74
|
|
66
75
|
float f_norm_eps;
|
67
76
|
float f_norm_rms_eps;
|
@@ -91,6 +100,15 @@ struct llama_hparams {
|
|
91
100
|
|
92
101
|
std::array<int, 4> rope_sections;
|
93
102
|
|
103
|
+
// Sliding Window Attention (SWA)
|
104
|
+
llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
|
105
|
+
// the size of the sliding window (0 - no SWA)
|
106
|
+
uint32_t n_swa = 0;
|
107
|
+
// if swa_layers[il] == true, then layer il is SWA
|
108
|
+
// if swa_layers[il] == false, then layer il is dense (i.e. non-SWA)
|
109
|
+
// by default, all layers are dense
|
110
|
+
std::array<bool, LLAMA_MAX_LAYERS> swa_layers;
|
111
|
+
|
94
112
|
// for State Space Models
|
95
113
|
uint32_t ssm_d_conv = 0;
|
96
114
|
uint32_t ssm_d_inner = 0;
|
@@ -111,11 +129,10 @@ struct llama_hparams {
|
|
111
129
|
bool causal_attn = true;
|
112
130
|
bool use_alibi = false;
|
113
131
|
bool attn_soft_cap = false;
|
132
|
+
bool use_kq_norm = true;
|
114
133
|
|
134
|
+
// llama4
|
115
135
|
uint32_t n_moe_layer_step = 0;
|
116
|
-
bool use_kq_norm = true;
|
117
|
-
uint32_t n_attn_chunk = 0;
|
118
|
-
// values below seems to be fixed on llama4
|
119
136
|
uint32_t n_no_rope_layer_step = 4;
|
120
137
|
uint32_t n_attn_temp_floor_scale = 8192;
|
121
138
|
float f_attn_temp_scale = 0.1;
|
@@ -128,6 +145,23 @@ struct llama_hparams {
|
|
128
145
|
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
|
129
146
|
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
|
130
147
|
|
148
|
+
// this value n_pattern means that every nth layer is dense (i.e. non-SWA)
|
149
|
+
// note that if n_pattern == 0, all layers are SWA
|
150
|
+
// if n_pattern == 1, all layers are dense
|
151
|
+
// example: n_pattern = 3
|
152
|
+
// il == 0: swa
|
153
|
+
// il == 1: swa
|
154
|
+
// il == 2: dense
|
155
|
+
// il == 3: swa
|
156
|
+
// il == 4: swa
|
157
|
+
// il == 5: dense
|
158
|
+
// il == 6: swa
|
159
|
+
// etc ...
|
160
|
+
void set_swa_pattern(uint32_t n_pattern);
|
161
|
+
|
162
|
+
// return true if one of the layers is SWA
|
163
|
+
bool is_swa_any() const;
|
164
|
+
|
131
165
|
uint32_t n_head(uint32_t il = 0) const;
|
132
166
|
|
133
167
|
uint32_t n_head_kv(uint32_t il = 0) const;
|