@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
{%- set today = strftime_now("%Y-%m-%d") %}
|
|
2
|
+
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything.
|
|
3
|
+
|
|
4
|
+
If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\").
|
|
5
|
+
You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date.
|
|
6
|
+
You follow these instructions in all languages, and always respond to the user in the language they use or request.
|
|
7
|
+
Next sections describe the capabilities that you have.
|
|
8
|
+
|
|
9
|
+
# WEB BROWSING INSTRUCTIONS
|
|
10
|
+
|
|
11
|
+
You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.
|
|
12
|
+
|
|
13
|
+
# MULTI-MODAL INSTRUCTIONS
|
|
14
|
+
|
|
15
|
+
You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
|
|
16
|
+
You cannot read nor transcribe audio files or videos.
|
|
17
|
+
|
|
18
|
+
# TOOL CALLING INSTRUCTIONS
|
|
19
|
+
|
|
20
|
+
You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations:
|
|
21
|
+
|
|
22
|
+
1. When the request requires up-to-date information.
|
|
23
|
+
2. When the request requires specific data that you do not have in your knowledge base.
|
|
24
|
+
3. When the request involves actions that you cannot perform without tools.
|
|
25
|
+
|
|
26
|
+
Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment." %}
|
|
27
|
+
|
|
28
|
+
{{- bos_token }}
|
|
29
|
+
|
|
30
|
+
{%- set system_prompt = default_system_message %}
|
|
31
|
+
{%- set loop_messages = messages %}
|
|
32
|
+
|
|
33
|
+
{%- if not tools is defined %}
|
|
34
|
+
{%- set tools = none %}
|
|
35
|
+
{%- endif %}
|
|
36
|
+
|
|
37
|
+
{%- if messages|length > 0 and messages[0]['role'] == 'system' %}
|
|
38
|
+
{%- if messages[0]['content'] is string %}
|
|
39
|
+
{%- set system_prompt = messages[0]['content'] %}
|
|
40
|
+
{%- else %}
|
|
41
|
+
{%- set system_prompt = messages[0]['content'][0]['text'] %}
|
|
42
|
+
{%- endif %}
|
|
43
|
+
{%- set loop_messages = messages[1:] %}
|
|
44
|
+
{%- endif %}
|
|
45
|
+
|
|
46
|
+
{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
|
|
47
|
+
|
|
48
|
+
{%- set ns = namespace(index=0) %}
|
|
49
|
+
{%- for message in loop_messages %}
|
|
50
|
+
{%- if not (message.role == "tool" or (message.get('tool_calls'))) %}
|
|
51
|
+
{%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
|
|
52
|
+
{{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
|
|
53
|
+
{%- endif %}
|
|
54
|
+
{%- set ns.index = ns.index + 1 %}
|
|
55
|
+
{%- endif %}
|
|
56
|
+
{%- endfor %}
|
|
57
|
+
|
|
58
|
+
{{- '[SYSTEM_PROMPT]' + system_prompt + '[/SYSTEM_PROMPT]' }}
|
|
59
|
+
|
|
60
|
+
{%- for message in loop_messages %}
|
|
61
|
+
{%- if message['role'] == 'system' %}
|
|
62
|
+
{%- if message['content'] is string %}
|
|
63
|
+
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
|
64
|
+
{%- else %}
|
|
65
|
+
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
|
66
|
+
{%- endif %}
|
|
67
|
+
{%- elif message['role'] == 'user' %}
|
|
68
|
+
{%- if tools is not none and (message == user_messages[-1]) %}
|
|
69
|
+
{{- '[AVAILABLE_TOOLS]' + tools|tojson + '[/AVAILABLE_TOOLS]' }}
|
|
70
|
+
{%- endif %}
|
|
71
|
+
{{- '[INST]' }}
|
|
72
|
+
{%- if message['content'] is string %}
|
|
73
|
+
{{- message['content'] }}
|
|
74
|
+
{%- else %}
|
|
75
|
+
{%- for block in message['content'] %}
|
|
76
|
+
{%- if block['type'] == 'text' %}
|
|
77
|
+
{{- block['text'] }}
|
|
78
|
+
{%- elif block['type'] in ['image', 'image_url'] %}
|
|
79
|
+
{{- '[IMG]' }}
|
|
80
|
+
{%- else %}
|
|
81
|
+
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
|
82
|
+
{%- endif %}
|
|
83
|
+
{%- endfor %}
|
|
84
|
+
{%- endif %}
|
|
85
|
+
{{- '[/INST]' }}
|
|
86
|
+
{%- elif message['role'] == 'assistant' %}
|
|
87
|
+
{%- if message.get('tool_calls') %}
|
|
88
|
+
{%- for tool_call in message.tool_calls %}
|
|
89
|
+
{{- '[TOOL_CALLS]' + tool_call.function.name }}
|
|
90
|
+
{%- if not tool_call.id is defined or tool_call.id is not string or tool_call.id|length != 9 %}
|
|
91
|
+
{{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
|
|
92
|
+
{%- endif %}
|
|
93
|
+
{{- '[CALL_ID]' + tool_call.id }}
|
|
94
|
+
{{- '[ARGS]' + tool_call['function']['arguments']|tojson }}
|
|
95
|
+
{%- endfor %}
|
|
96
|
+
{{- eos_token }}
|
|
97
|
+
{%- elif message['content'] is string %}
|
|
98
|
+
{{- message['content'] + eos_token }}
|
|
99
|
+
{%- else %}
|
|
100
|
+
{%- for block in message['content'] %}
|
|
101
|
+
{%- if block['type'] == 'text' %}
|
|
102
|
+
{{- block['text'] }}
|
|
103
|
+
{%- elif block['type'] in ['image', 'image_url'] %}
|
|
104
|
+
{{- '[IMG]' }}
|
|
105
|
+
{%- else %}
|
|
106
|
+
{{- raise_exception('Only text and image blocks are supported in assistant content!') }}
|
|
107
|
+
{%- endif %}
|
|
108
|
+
{%- endfor %}
|
|
109
|
+
{{- eos_token }}
|
|
110
|
+
{%- endif %}
|
|
111
|
+
{%- elif message['role'] == 'tool_results' or message['role'] == 'tool' %}
|
|
112
|
+
{%- if message.content is defined and message.content.content is defined %}
|
|
113
|
+
{%- set content = message.content.content %}
|
|
114
|
+
{%- else %}
|
|
115
|
+
{%- set content = message.content %}
|
|
116
|
+
{%- endif %}
|
|
117
|
+
{%- if not message.tool_call_id is defined or message.tool_call_id is not string or message['tool_call_id']|length != 9 %}
|
|
118
|
+
{{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
|
|
119
|
+
{%- endif %}
|
|
120
|
+
{{- '[TOOL_RESULTS]' + message.tool_call_id + '[TOOL_CONTENT]' + content|string + '[/TOOL_RESULTS]' }}
|
|
121
|
+
{%- else %}
|
|
122
|
+
{{- raise_exception('Only system, user, assistant, and tool roles are supported!') }}
|
|
123
|
+
{%- endif %}
|
|
124
|
+
{%- endfor %}
|
|
@@ -42,8 +42,12 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
42
42
|
{ LLM_ARCH_GEMMA, "gemma" },
|
|
43
43
|
{ LLM_ARCH_GEMMA2, "gemma2" },
|
|
44
44
|
{ LLM_ARCH_GEMMA3, "gemma3" },
|
|
45
|
+
{ LLM_ARCH_GEMMA3N, "gemma3n" },
|
|
45
46
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
|
46
47
|
{ LLM_ARCH_MAMBA, "mamba" },
|
|
48
|
+
{ LLM_ARCH_MAMBA2, "mamba2" },
|
|
49
|
+
{ LLM_ARCH_JAMBA, "jamba" },
|
|
50
|
+
{ LLM_ARCH_FALCON_H1, "falcon-h1" },
|
|
47
51
|
{ LLM_ARCH_XVERSE, "xverse" },
|
|
48
52
|
{ LLM_ARCH_COMMAND_R, "command-r" },
|
|
49
53
|
{ LLM_ARCH_COHERE2, "cohere2" },
|
|
@@ -69,12 +73,17 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
69
73
|
{ LLM_ARCH_ARWKV7, "arwkv7" },
|
|
70
74
|
{ LLM_ARCH_GRANITE, "granite" },
|
|
71
75
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
|
76
|
+
{ LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
|
|
72
77
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
|
73
78
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
|
74
79
|
{ LLM_ARCH_PLM, "plm" },
|
|
75
80
|
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
|
76
81
|
{ LLM_ARCH_DOTS1, "dots1" },
|
|
77
82
|
{ LLM_ARCH_ARCEE, "arcee" },
|
|
83
|
+
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
|
|
84
|
+
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
|
|
85
|
+
{ LLM_ARCH_SMOLLM3, "smollm3" },
|
|
86
|
+
{ LLM_ARCH_LFM2, "lfm2" },
|
|
78
87
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
|
79
88
|
};
|
|
80
89
|
|
|
@@ -147,7 +156,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
147
156
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
148
157
|
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
|
149
158
|
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
|
150
|
-
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
|
|
151
159
|
|
|
152
160
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
153
161
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -168,6 +176,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
168
176
|
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
|
|
169
177
|
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
|
|
170
178
|
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
|
|
179
|
+
{ LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
|
|
171
180
|
{ LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
|
|
172
181
|
|
|
173
182
|
{ LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
|
|
@@ -180,6 +189,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
180
189
|
|
|
181
190
|
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
|
|
182
191
|
|
|
192
|
+
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
|
|
193
|
+
|
|
183
194
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
|
184
195
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
|
185
196
|
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
|
@@ -198,6 +209,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
198
209
|
{ LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
|
|
199
210
|
{ LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
|
|
200
211
|
{ LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
|
|
212
|
+
{ LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
|
|
201
213
|
{ LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
|
|
202
214
|
{ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
|
|
203
215
|
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
|
|
@@ -931,6 +943,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
931
943
|
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
932
944
|
},
|
|
933
945
|
},
|
|
946
|
+
{
|
|
947
|
+
LLM_ARCH_GEMMA3N,
|
|
948
|
+
{
|
|
949
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
950
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
951
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
952
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
953
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
954
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
955
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
956
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
957
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
958
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
959
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
960
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
961
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
962
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
963
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
964
|
+
{ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
|
|
965
|
+
{ LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
|
|
966
|
+
{ LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
|
|
967
|
+
{ LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
|
|
968
|
+
{ LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
|
|
969
|
+
{ LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
|
|
970
|
+
{ LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
|
|
971
|
+
{ LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
|
|
972
|
+
{ LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
|
|
973
|
+
{ LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
|
|
974
|
+
{ LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
|
|
975
|
+
{ LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
|
|
976
|
+
{ LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
|
|
977
|
+
{ LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
|
|
978
|
+
{ LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
|
|
979
|
+
{ LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
|
|
980
|
+
},
|
|
981
|
+
},
|
|
934
982
|
{
|
|
935
983
|
LLM_ARCH_STARCODER2,
|
|
936
984
|
{
|
|
@@ -965,6 +1013,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
965
1013
|
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
966
1014
|
},
|
|
967
1015
|
},
|
|
1016
|
+
{
|
|
1017
|
+
LLM_ARCH_MAMBA2,
|
|
1018
|
+
{
|
|
1019
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1020
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1021
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1022
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1023
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1024
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1025
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1026
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1027
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1028
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1029
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1030
|
+
},
|
|
1031
|
+
},
|
|
1032
|
+
{
|
|
1033
|
+
LLM_ARCH_JAMBA,
|
|
1034
|
+
{
|
|
1035
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1036
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1037
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1038
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1039
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1040
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1041
|
+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
|
|
1042
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1043
|
+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
|
|
1044
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1045
|
+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
|
|
1046
|
+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
|
|
1047
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1048
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1049
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1050
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1051
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1052
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1053
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1054
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1055
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1056
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1057
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1058
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1059
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1060
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1061
|
+
},
|
|
1062
|
+
},
|
|
1063
|
+
{
|
|
1064
|
+
LLM_ARCH_FALCON_H1,
|
|
1065
|
+
{
|
|
1066
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1067
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1068
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1069
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1070
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1071
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1072
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1073
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1074
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1075
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1076
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1077
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1078
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1079
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1080
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1081
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1082
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1083
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1084
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1085
|
+
},
|
|
1086
|
+
},
|
|
968
1087
|
{
|
|
969
1088
|
LLM_ARCH_XVERSE,
|
|
970
1089
|
{
|
|
@@ -1525,6 +1644,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1525
1644
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1526
1645
|
},
|
|
1527
1646
|
},
|
|
1647
|
+
{
|
|
1648
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
1649
|
+
{
|
|
1650
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1651
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1652
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1653
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1654
|
+
// mamba(2) ssm layers
|
|
1655
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1656
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1657
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1658
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1659
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1660
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1661
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1662
|
+
// attention layers
|
|
1663
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1664
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1665
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1666
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1667
|
+
// dense FFN
|
|
1668
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1669
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1670
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1671
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1672
|
+
// moe FFN
|
|
1673
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1674
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1675
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1676
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1677
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1678
|
+
// shared expert
|
|
1679
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1680
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1681
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1682
|
+
},
|
|
1683
|
+
},
|
|
1528
1684
|
{
|
|
1529
1685
|
LLM_ARCH_CHAMELEON,
|
|
1530
1686
|
{
|
|
@@ -1620,6 +1776,84 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1620
1776
|
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1621
1777
|
}
|
|
1622
1778
|
},
|
|
1779
|
+
{
|
|
1780
|
+
LLM_ARCH_ERNIE4_5,
|
|
1781
|
+
{
|
|
1782
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1783
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1784
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1785
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1786
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1787
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1788
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1789
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1790
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1791
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1792
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1793
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1794
|
+
},
|
|
1795
|
+
},
|
|
1796
|
+
{
|
|
1797
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
1798
|
+
{
|
|
1799
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1800
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1801
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1802
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1803
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1804
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1805
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1806
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1807
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1808
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1809
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1810
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1811
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1812
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1813
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1814
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1815
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1816
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1817
|
+
},
|
|
1818
|
+
},
|
|
1819
|
+
{
|
|
1820
|
+
LLM_ARCH_SMOLLM3,
|
|
1821
|
+
{
|
|
1822
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1823
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1824
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1825
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1826
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1827
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1828
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1829
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1830
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1831
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1832
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1833
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1834
|
+
},
|
|
1835
|
+
},
|
|
1836
|
+
{
|
|
1837
|
+
LLM_ARCH_LFM2,
|
|
1838
|
+
{
|
|
1839
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1840
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1841
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1842
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1843
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1844
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1845
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1846
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1847
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1848
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1849
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1850
|
+
{ LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
|
|
1851
|
+
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
|
|
1852
|
+
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
|
|
1853
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1854
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
1855
|
+
}
|
|
1856
|
+
},
|
|
1623
1857
|
{
|
|
1624
1858
|
LLM_ARCH_UNKNOWN,
|
|
1625
1859
|
{
|
|
@@ -1704,7 +1938,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1704
1938
|
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
|
|
1705
1939
|
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
1706
1940
|
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
|
|
1941
|
+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1942
|
+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1943
|
+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1707
1944
|
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1945
|
+
{LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1708
1946
|
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1709
1947
|
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1710
1948
|
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
@@ -1748,6 +1986,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1748
1986
|
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
|
|
1749
1987
|
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
|
|
1750
1988
|
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1989
|
+
// altup / laurel (gemma 3n)
|
|
1990
|
+
{LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
|
|
1991
|
+
{LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1992
|
+
{LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
|
|
1993
|
+
{LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1994
|
+
{LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
|
1995
|
+
{LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1996
|
+
{LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1997
|
+
{LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1998
|
+
{LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1999
|
+
{LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2000
|
+
{LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2001
|
+
{LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2002
|
+
{LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2003
|
+
{LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2004
|
+
{LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2005
|
+
{LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1751
2006
|
// this tensor is loaded for T5, but never used
|
|
1752
2007
|
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
|
|
1753
2008
|
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
|
|
@@ -1766,6 +2021,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1766
2021
|
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1767
2022
|
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1768
2023
|
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2024
|
+
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
2025
|
+
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2026
|
+
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1769
2027
|
};
|
|
1770
2028
|
|
|
1771
2029
|
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
|
@@ -1821,6 +2079,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
|
|
|
1821
2079
|
bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
1822
2080
|
switch (arch) {
|
|
1823
2081
|
case LLM_ARCH_MAMBA:
|
|
2082
|
+
case LLM_ARCH_MAMBA2:
|
|
1824
2083
|
case LLM_ARCH_RWKV6:
|
|
1825
2084
|
case LLM_ARCH_RWKV6QWEN2:
|
|
1826
2085
|
case LLM_ARCH_RWKV7:
|
|
@@ -1832,9 +2091,12 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
|
1832
2091
|
}
|
|
1833
2092
|
|
|
1834
2093
|
bool llm_arch_is_hybrid(const llm_arch & arch) {
|
|
1835
|
-
// TODO: There are currently no hybrid models! Once there are, this will be
|
|
1836
|
-
// the place to identify them
|
|
1837
2094
|
switch (arch) {
|
|
2095
|
+
case LLM_ARCH_JAMBA:
|
|
2096
|
+
case LLM_ARCH_FALCON_H1:
|
|
2097
|
+
case LLM_ARCH_GRANITE_HYBRID:
|
|
2098
|
+
case LLM_ARCH_LFM2:
|
|
2099
|
+
return true;
|
|
1838
2100
|
default:
|
|
1839
2101
|
return false;
|
|
1840
2102
|
}
|
|
@@ -46,8 +46,12 @@ enum llm_arch {
|
|
|
46
46
|
LLM_ARCH_GEMMA,
|
|
47
47
|
LLM_ARCH_GEMMA2,
|
|
48
48
|
LLM_ARCH_GEMMA3,
|
|
49
|
+
LLM_ARCH_GEMMA3N,
|
|
49
50
|
LLM_ARCH_STARCODER2,
|
|
50
51
|
LLM_ARCH_MAMBA,
|
|
52
|
+
LLM_ARCH_MAMBA2,
|
|
53
|
+
LLM_ARCH_JAMBA,
|
|
54
|
+
LLM_ARCH_FALCON_H1,
|
|
51
55
|
LLM_ARCH_XVERSE,
|
|
52
56
|
LLM_ARCH_COMMAND_R,
|
|
53
57
|
LLM_ARCH_COHERE2,
|
|
@@ -73,12 +77,17 @@ enum llm_arch {
|
|
|
73
77
|
LLM_ARCH_ARWKV7,
|
|
74
78
|
LLM_ARCH_GRANITE,
|
|
75
79
|
LLM_ARCH_GRANITE_MOE,
|
|
80
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
76
81
|
LLM_ARCH_CHAMELEON,
|
|
77
82
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
|
78
83
|
LLM_ARCH_PLM,
|
|
79
84
|
LLM_ARCH_BAILINGMOE,
|
|
80
85
|
LLM_ARCH_DOTS1,
|
|
81
86
|
LLM_ARCH_ARCEE,
|
|
87
|
+
LLM_ARCH_ERNIE4_5,
|
|
88
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
89
|
+
LLM_ARCH_SMOLLM3,
|
|
90
|
+
LLM_ARCH_LFM2,
|
|
82
91
|
LLM_ARCH_UNKNOWN,
|
|
83
92
|
};
|
|
84
93
|
|
|
@@ -151,7 +160,6 @@ enum llm_kv {
|
|
|
151
160
|
LLM_KV_ATTENTION_SCALE,
|
|
152
161
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
|
153
162
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
|
154
|
-
LLM_KV_ATTENTION_LAYER_INDICES,
|
|
155
163
|
|
|
156
164
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
|
157
165
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
|
@@ -172,6 +180,7 @@ enum llm_kv {
|
|
|
172
180
|
LLM_KV_SSM_CONV_KERNEL,
|
|
173
181
|
LLM_KV_SSM_STATE_SIZE,
|
|
174
182
|
LLM_KV_SSM_TIME_STEP_RANK,
|
|
183
|
+
LLM_KV_SSM_GROUP_COUNT,
|
|
175
184
|
LLM_KV_SSM_DT_B_C_RMS,
|
|
176
185
|
|
|
177
186
|
LLM_KV_WKV_HEAD_SIZE,
|
|
@@ -194,6 +203,7 @@ enum llm_kv {
|
|
|
194
203
|
LLM_KV_TOKENIZER_MASK_ID,
|
|
195
204
|
LLM_KV_TOKENIZER_ADD_BOS,
|
|
196
205
|
LLM_KV_TOKENIZER_ADD_EOS,
|
|
206
|
+
LLM_KV_TOKENIZER_ADD_SEP,
|
|
197
207
|
LLM_KV_TOKENIZER_ADD_PREFIX,
|
|
198
208
|
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
|
199
209
|
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
|
@@ -218,6 +228,8 @@ enum llm_kv {
|
|
|
218
228
|
|
|
219
229
|
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
|
220
230
|
|
|
231
|
+
LLM_KV_SHORTCONV_L_CACHE,
|
|
232
|
+
|
|
221
233
|
// deprecated:
|
|
222
234
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
|
223
235
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
|
@@ -268,12 +280,32 @@ enum llm_tensor {
|
|
|
268
280
|
LLM_TENSOR_LAYER_OUT_NORM,
|
|
269
281
|
LLM_TENSOR_POST_ATTN_NORM,
|
|
270
282
|
LLM_TENSOR_POST_MLP_NORM,
|
|
283
|
+
LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
|
|
284
|
+
LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
|
|
285
|
+
LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
|
|
286
|
+
LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
|
|
287
|
+
LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
|
|
288
|
+
LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
|
|
289
|
+
LLM_TENSOR_ALTUP_PROJ, // gemma3n
|
|
290
|
+
LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
|
|
291
|
+
LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
|
|
292
|
+
LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
|
|
293
|
+
LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
|
|
294
|
+
LLM_TENSOR_ALTUP_ROUTER, // gemma3n
|
|
295
|
+
LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
|
|
296
|
+
LLM_TENSOR_LAUREL_L, // gemma3n
|
|
297
|
+
LLM_TENSOR_LAUREL_R, // gemma3n
|
|
298
|
+
LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
|
|
271
299
|
LLM_TENSOR_SSM_IN,
|
|
272
300
|
LLM_TENSOR_SSM_CONV1D,
|
|
273
301
|
LLM_TENSOR_SSM_X,
|
|
274
302
|
LLM_TENSOR_SSM_DT,
|
|
303
|
+
LLM_TENSOR_SSM_DT_NORM,
|
|
275
304
|
LLM_TENSOR_SSM_A,
|
|
305
|
+
LLM_TENSOR_SSM_B_NORM,
|
|
306
|
+
LLM_TENSOR_SSM_C_NORM,
|
|
276
307
|
LLM_TENSOR_SSM_D,
|
|
308
|
+
LLM_TENSOR_SSM_NORM,
|
|
277
309
|
LLM_TENSOR_SSM_OUT,
|
|
278
310
|
LLM_TENSOR_TIME_MIX_W0,
|
|
279
311
|
LLM_TENSOR_TIME_MIX_W1,
|
|
@@ -367,6 +399,9 @@ enum llm_tensor {
|
|
|
367
399
|
LLM_TENSOR_POS_NET_ATTN_K,
|
|
368
400
|
LLM_TENSOR_POS_NET_ATTN_V,
|
|
369
401
|
LLM_TENSOR_POS_NET_ATTN_OUT,
|
|
402
|
+
LLM_TENSOR_SHORTCONV_CONV,
|
|
403
|
+
LLM_TENSOR_SHORTCONV_INPROJ,
|
|
404
|
+
LLM_TENSOR_SHORTCONV_OUTPROJ,
|
|
370
405
|
};
|
|
371
406
|
|
|
372
407
|
enum llm_tensor_layer {
|