@novastera-oss/llamarn 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +141 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +58 -24
- package/cpp/LlamaCppModel.h +3 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +32 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
- package/cpp/llama.cpp/common/arg.cpp +37 -6
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
- package/cpp/llama.cpp/common/chat-parser.h +2 -0
- package/cpp/llama.cpp/common/chat.cpp +12 -9
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +53 -40
- package/cpp/llama.cpp/common/common.h +6 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
- package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +141 -38
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
- package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
- package/cpp/llama.cpp/src/llama-arch.h +25 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
- package/cpp/llama.cpp/src/llama-batch.h +110 -57
- package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +360 -266
- package/cpp/llama.cpp/src/llama-context.h +27 -23
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
- package/cpp/llama.cpp/src/llama-graph.h +126 -58
- package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
- package/cpp/llama.cpp/src/llama-hparams.h +16 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
- package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +73 -36
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
- package/cpp/llama.cpp/src/llama-model.h +26 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
- package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/rn-completion.cpp +2 -2
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
- package/ios/include/chat.h +1 -1
- package/ios/include/common.h +6 -2
- package/ios/include/llama.h +141 -38
- package/ios/libs/llama.xcframework/Info.plist +15 -15
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
20
20
|
{ LLM_ARCH_BERT, "bert" },
|
|
21
21
|
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
|
22
22
|
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
|
|
23
|
+
{ LLM_ARCH_NEO_BERT, "neo-bert" },
|
|
23
24
|
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
|
|
24
25
|
{ LLM_ARCH_BLOOM, "bloom" },
|
|
25
26
|
{ LLM_ARCH_STABLELM, "stablelm" },
|
|
@@ -41,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
41
42
|
{ LLM_ARCH_GEMMA, "gemma" },
|
|
42
43
|
{ LLM_ARCH_GEMMA2, "gemma2" },
|
|
43
44
|
{ LLM_ARCH_GEMMA3, "gemma3" },
|
|
45
|
+
{ LLM_ARCH_GEMMA3N, "gemma3n" },
|
|
44
46
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
|
45
47
|
{ LLM_ARCH_MAMBA, "mamba" },
|
|
46
48
|
{ LLM_ARCH_XVERSE, "xverse" },
|
|
@@ -72,6 +74,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
72
74
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
|
73
75
|
{ LLM_ARCH_PLM, "plm" },
|
|
74
76
|
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
|
77
|
+
{ LLM_ARCH_DOTS1, "dots1" },
|
|
78
|
+
{ LLM_ARCH_ARCEE, "arcee" },
|
|
75
79
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
|
76
80
|
};
|
|
77
81
|
|
|
@@ -144,6 +148,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
144
148
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
145
149
|
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
|
146
150
|
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
|
151
|
+
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
|
|
147
152
|
|
|
148
153
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
149
154
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -194,13 +199,13 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
194
199
|
{ LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
|
|
195
200
|
{ LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
|
|
196
201
|
{ LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
|
|
202
|
+
{ LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
|
|
197
203
|
{ LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
|
|
198
204
|
{ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
|
|
199
205
|
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
|
|
200
206
|
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
|
|
201
207
|
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
|
|
202
208
|
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
|
|
203
|
-
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
|
|
204
209
|
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
|
|
205
210
|
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
|
|
206
211
|
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
|
|
@@ -244,6 +249,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
244
249
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
245
250
|
},
|
|
246
251
|
},
|
|
252
|
+
{
|
|
253
|
+
LLM_ARCH_ARCEE,
|
|
254
|
+
{
|
|
255
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
256
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
257
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
258
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
259
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
260
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
261
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
262
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
263
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
264
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
|
265
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
266
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
267
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
268
|
+
},
|
|
269
|
+
},
|
|
247
270
|
{
|
|
248
271
|
LLM_ARCH_LLAMA4,
|
|
249
272
|
{
|
|
@@ -495,6 +518,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
495
518
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
496
519
|
},
|
|
497
520
|
},
|
|
521
|
+
{
|
|
522
|
+
LLM_ARCH_NEO_BERT,
|
|
523
|
+
{
|
|
524
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
525
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
526
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
|
527
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
528
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
529
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
530
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
531
|
+
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
|
|
532
|
+
{ LLM_TENSOR_CLS, "cls" },
|
|
533
|
+
{ LLM_TENSOR_CLS_OUT, "cls.output" },
|
|
534
|
+
},
|
|
535
|
+
},
|
|
498
536
|
{
|
|
499
537
|
LLM_ARCH_JINA_BERT_V2,
|
|
500
538
|
{
|
|
@@ -895,6 +933,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
895
933
|
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
896
934
|
},
|
|
897
935
|
},
|
|
936
|
+
{
|
|
937
|
+
LLM_ARCH_GEMMA3N,
|
|
938
|
+
{
|
|
939
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
940
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
941
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
942
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
943
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
944
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
945
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
946
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
947
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
948
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
949
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
950
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
951
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
952
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
953
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
954
|
+
{ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
|
|
955
|
+
{ LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
|
|
956
|
+
{ LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
|
|
957
|
+
{ LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
|
|
958
|
+
{ LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
|
|
959
|
+
{ LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
|
|
960
|
+
{ LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
|
|
961
|
+
{ LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
|
|
962
|
+
{ LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
|
|
963
|
+
{ LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
|
|
964
|
+
{ LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
|
|
965
|
+
{ LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
|
|
966
|
+
{ LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
|
|
967
|
+
{ LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
|
|
968
|
+
{ LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
|
|
969
|
+
{ LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
|
|
970
|
+
},
|
|
971
|
+
},
|
|
898
972
|
{
|
|
899
973
|
LLM_ARCH_STARCODER2,
|
|
900
974
|
{
|
|
@@ -1556,6 +1630,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1556
1630
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1557
1631
|
},
|
|
1558
1632
|
},
|
|
1633
|
+
{
|
|
1634
|
+
LLM_ARCH_DOTS1,
|
|
1635
|
+
{
|
|
1636
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1637
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1638
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1639
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1640
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1641
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1642
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1643
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1644
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1645
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1646
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1647
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1648
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1649
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1650
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1651
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1652
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1653
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1654
|
+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
|
|
1655
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1656
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1657
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1658
|
+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1659
|
+
}
|
|
1660
|
+
},
|
|
1559
1661
|
{
|
|
1560
1662
|
LLM_ARCH_UNKNOWN,
|
|
1561
1663
|
{
|
|
@@ -1684,6 +1786,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1684
1786
|
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
|
|
1685
1787
|
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
|
|
1686
1788
|
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1789
|
+
// altup / laurel (gemma 3n)
|
|
1790
|
+
{LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
|
|
1791
|
+
{LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1792
|
+
{LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
|
|
1793
|
+
{LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1794
|
+
{LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1795
|
+
{LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1796
|
+
{LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1797
|
+
{LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1798
|
+
{LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1799
|
+
{LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1800
|
+
{LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1801
|
+
{LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1802
|
+
{LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1803
|
+
{LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1804
|
+
{LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1805
|
+
{LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1687
1806
|
// this tensor is loaded for T5, but never used
|
|
1688
1807
|
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
|
|
1689
1808
|
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
|
|
@@ -1707,8 +1826,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1707
1826
|
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
|
1708
1827
|
|
|
1709
1828
|
std::string LLM_KV::operator()(llm_kv kv) const {
|
|
1710
|
-
|
|
1711
|
-
|
|
1829
|
+
std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
|
1830
|
+
|
|
1831
|
+
if (suffix != nullptr) {
|
|
1832
|
+
name += ".";
|
|
1833
|
+
name += suffix;
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
return name;
|
|
1712
1837
|
}
|
|
1713
1838
|
|
|
1714
1839
|
std::string LLM_TN_IMPL::str() const {
|
|
@@ -1747,3 +1872,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
|
|
|
1747
1872
|
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
|
|
1748
1873
|
return LLM_TENSOR_INFOS.at(tensor);
|
|
1749
1874
|
}
|
|
1875
|
+
|
|
1876
|
+
bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
1877
|
+
switch (arch) {
|
|
1878
|
+
case LLM_ARCH_MAMBA:
|
|
1879
|
+
case LLM_ARCH_RWKV6:
|
|
1880
|
+
case LLM_ARCH_RWKV6QWEN2:
|
|
1881
|
+
case LLM_ARCH_RWKV7:
|
|
1882
|
+
case LLM_ARCH_ARWKV7:
|
|
1883
|
+
return true;
|
|
1884
|
+
default:
|
|
1885
|
+
return false;
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
bool llm_arch_is_hybrid(const llm_arch & arch) {
|
|
1890
|
+
// TODO: There are currently no hybrid models! Once there are, this will be
|
|
1891
|
+
// the place to identify them
|
|
1892
|
+
switch (arch) {
|
|
1893
|
+
default:
|
|
1894
|
+
return false;
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
@@ -24,6 +24,7 @@ enum llm_arch {
|
|
|
24
24
|
LLM_ARCH_BERT,
|
|
25
25
|
LLM_ARCH_NOMIC_BERT,
|
|
26
26
|
LLM_ARCH_NOMIC_BERT_MOE,
|
|
27
|
+
LLM_ARCH_NEO_BERT,
|
|
27
28
|
LLM_ARCH_JINA_BERT_V2,
|
|
28
29
|
LLM_ARCH_BLOOM,
|
|
29
30
|
LLM_ARCH_STABLELM,
|
|
@@ -45,6 +46,7 @@ enum llm_arch {
|
|
|
45
46
|
LLM_ARCH_GEMMA,
|
|
46
47
|
LLM_ARCH_GEMMA2,
|
|
47
48
|
LLM_ARCH_GEMMA3,
|
|
49
|
+
LLM_ARCH_GEMMA3N,
|
|
48
50
|
LLM_ARCH_STARCODER2,
|
|
49
51
|
LLM_ARCH_MAMBA,
|
|
50
52
|
LLM_ARCH_XVERSE,
|
|
@@ -76,6 +78,8 @@ enum llm_arch {
|
|
|
76
78
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
|
77
79
|
LLM_ARCH_PLM,
|
|
78
80
|
LLM_ARCH_BAILINGMOE,
|
|
81
|
+
LLM_ARCH_DOTS1,
|
|
82
|
+
LLM_ARCH_ARCEE,
|
|
79
83
|
LLM_ARCH_UNKNOWN,
|
|
80
84
|
};
|
|
81
85
|
|
|
@@ -148,6 +152,7 @@ enum llm_kv {
|
|
|
148
152
|
LLM_KV_ATTENTION_SCALE,
|
|
149
153
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
|
150
154
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
|
155
|
+
LLM_KV_ATTENTION_LAYER_INDICES,
|
|
151
156
|
|
|
152
157
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
|
153
158
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
|
@@ -190,13 +195,13 @@ enum llm_kv {
|
|
|
190
195
|
LLM_KV_TOKENIZER_MASK_ID,
|
|
191
196
|
LLM_KV_TOKENIZER_ADD_BOS,
|
|
192
197
|
LLM_KV_TOKENIZER_ADD_EOS,
|
|
198
|
+
LLM_KV_TOKENIZER_ADD_SEP,
|
|
193
199
|
LLM_KV_TOKENIZER_ADD_PREFIX,
|
|
194
200
|
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
|
195
201
|
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
|
196
202
|
LLM_KV_TOKENIZER_HF_JSON,
|
|
197
203
|
LLM_KV_TOKENIZER_RWKV,
|
|
198
204
|
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
|
199
|
-
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
|
|
200
205
|
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
|
201
206
|
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
|
202
207
|
LLM_KV_TOKENIZER_FIM_MID_ID,
|
|
@@ -265,6 +270,22 @@ enum llm_tensor {
|
|
|
265
270
|
LLM_TENSOR_LAYER_OUT_NORM,
|
|
266
271
|
LLM_TENSOR_POST_ATTN_NORM,
|
|
267
272
|
LLM_TENSOR_POST_MLP_NORM,
|
|
273
|
+
LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
|
|
274
|
+
LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
|
|
275
|
+
LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
|
|
276
|
+
LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
|
|
277
|
+
LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
|
|
278
|
+
LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
|
|
279
|
+
LLM_TENSOR_ALTUP_PROJ, // gemma3n
|
|
280
|
+
LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
|
|
281
|
+
LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
|
|
282
|
+
LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
|
|
283
|
+
LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
|
|
284
|
+
LLM_TENSOR_ALTUP_ROUTER, // gemma3n
|
|
285
|
+
LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
|
|
286
|
+
LLM_TENSOR_LAUREL_L, // gemma3n
|
|
287
|
+
LLM_TENSOR_LAUREL_R, // gemma3n
|
|
288
|
+
LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
|
|
268
289
|
LLM_TENSOR_SSM_IN,
|
|
269
290
|
LLM_TENSOR_SSM_CONV1D,
|
|
270
291
|
LLM_TENSOR_SSM_X,
|
|
@@ -437,3 +458,6 @@ const char * llm_arch_name(llm_arch arch);
|
|
|
437
458
|
llm_arch llm_arch_from_string(const std::string & name);
|
|
438
459
|
|
|
439
460
|
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
|
461
|
+
|
|
462
|
+
bool llm_arch_is_recurrent(const llm_arch & arch);
|
|
463
|
+
bool llm_arch_is_hybrid (const llm_arch & arch);
|