@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -335,7 +335,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
335
335
|
|
|
336
336
|
for (uint32_t i = 0; i < magic.size(); i++) {
|
|
337
337
|
if (magic[i] != GGUF_MAGIC[i]) {
|
|
338
|
-
|
|
338
|
+
char c0 = isprint(magic[0]) ? magic[0] : '?';
|
|
339
|
+
char c1 = isprint(magic[1]) ? magic[1] : '?';
|
|
340
|
+
char c2 = isprint(magic[2]) ? magic[2] : '?';
|
|
341
|
+
char c3 = isprint(magic[3]) ? magic[3] : '?';
|
|
342
|
+
GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
|
|
339
343
|
gguf_free(ctx);
|
|
340
344
|
return nullptr;
|
|
341
345
|
}
|
|
@@ -627,7 +631,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
627
631
|
gguf_free(ctx);
|
|
628
632
|
return nullptr;
|
|
629
633
|
}
|
|
630
|
-
|
|
634
|
+
size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
|
|
635
|
+
if (SIZE_MAX - ctx->size < padded_size) {
|
|
636
|
+
GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
|
|
637
|
+
__func__, ti.t.name, ctx->size, padded_size);
|
|
638
|
+
gguf_free(ctx);
|
|
639
|
+
return nullptr;
|
|
640
|
+
}
|
|
641
|
+
ctx->size += padded_size;
|
|
631
642
|
}
|
|
632
643
|
}
|
|
633
644
|
|
|
@@ -118,6 +118,10 @@ class Keys:
|
|
|
118
118
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
|
119
119
|
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
|
120
120
|
INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
|
|
121
|
+
ACTIVATION_SPARSITY_SCALE = "{arch}.activation_sparsity_scale"
|
|
122
|
+
ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx"
|
|
123
|
+
ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs"
|
|
124
|
+
EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input"
|
|
121
125
|
|
|
122
126
|
class Attention:
|
|
123
127
|
HEAD_COUNT = "{arch}.attention.head_count"
|
|
@@ -142,6 +146,8 @@ class Keys:
|
|
|
142
146
|
SCALE = "{arch}.attention.scale"
|
|
143
147
|
KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
|
|
144
148
|
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
|
|
149
|
+
SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
|
|
150
|
+
SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
|
|
145
151
|
|
|
146
152
|
class Rope:
|
|
147
153
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
|
@@ -164,6 +170,7 @@ class Keys:
|
|
|
164
170
|
INNER_SIZE = "{arch}.ssm.inner_size"
|
|
165
171
|
STATE_SIZE = "{arch}.ssm.state_size"
|
|
166
172
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
|
173
|
+
GROUP_COUNT = "{arch}.ssm.group_count"
|
|
167
174
|
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
|
|
168
175
|
|
|
169
176
|
class WKV:
|
|
@@ -180,6 +187,9 @@ class Keys:
|
|
|
180
187
|
class Classifier:
|
|
181
188
|
OUTPUT_LABELS = "{arch}.classifier.output_labels"
|
|
182
189
|
|
|
190
|
+
class ShortConv:
|
|
191
|
+
L_CACHE = "{arch}.shortconv.l_cache"
|
|
192
|
+
|
|
183
193
|
class Tokenizer:
|
|
184
194
|
MODEL = "tokenizer.ggml.model"
|
|
185
195
|
PRE = "tokenizer.ggml.pre"
|
|
@@ -198,6 +208,7 @@ class Keys:
|
|
|
198
208
|
MASK_ID = "tokenizer.ggml.mask_token_id"
|
|
199
209
|
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
|
200
210
|
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
|
211
|
+
ADD_SEP = "tokenizer.ggml.add_sep_token"
|
|
201
212
|
ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
|
|
202
213
|
REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
|
|
203
214
|
PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
|
|
@@ -280,6 +291,7 @@ class MODEL_ARCH(IntEnum):
|
|
|
280
291
|
LLAMA4 = auto()
|
|
281
292
|
DECI = auto()
|
|
282
293
|
FALCON = auto()
|
|
294
|
+
FALCON_H1 = auto()
|
|
283
295
|
BAICHUAN = auto()
|
|
284
296
|
GROK = auto()
|
|
285
297
|
GPT2 = auto()
|
|
@@ -313,12 +325,15 @@ class MODEL_ARCH(IntEnum):
|
|
|
313
325
|
GEMMA = auto()
|
|
314
326
|
GEMMA2 = auto()
|
|
315
327
|
GEMMA3 = auto()
|
|
328
|
+
GEMMA3N = auto()
|
|
316
329
|
STARCODER2 = auto()
|
|
317
330
|
RWKV6 = auto()
|
|
318
331
|
RWKV6QWEN2 = auto()
|
|
319
332
|
RWKV7 = auto()
|
|
320
333
|
ARWKV7 = auto()
|
|
321
334
|
MAMBA = auto()
|
|
335
|
+
MAMBA2 = auto()
|
|
336
|
+
JAMBA = auto()
|
|
322
337
|
XVERSE = auto()
|
|
323
338
|
COMMAND_R = auto()
|
|
324
339
|
COHERE2 = auto()
|
|
@@ -340,12 +355,17 @@ class MODEL_ARCH(IntEnum):
|
|
|
340
355
|
EXAONE = auto()
|
|
341
356
|
GRANITE = auto()
|
|
342
357
|
GRANITE_MOE = auto()
|
|
358
|
+
GRANITE_HYBRID = auto()
|
|
343
359
|
CHAMELEON = auto()
|
|
344
360
|
WAVTOKENIZER_DEC = auto()
|
|
345
361
|
PLM = auto()
|
|
346
362
|
BAILINGMOE = auto()
|
|
347
363
|
DOTS1 = auto()
|
|
348
364
|
ARCEE = auto()
|
|
365
|
+
ERNIE4_5 = auto()
|
|
366
|
+
HUNYUAN_MOE = auto()
|
|
367
|
+
SMOLLM3 = auto()
|
|
368
|
+
LFM2 = auto()
|
|
349
369
|
|
|
350
370
|
|
|
351
371
|
class VISION_PROJECTOR_TYPE(IntEnum):
|
|
@@ -398,12 +418,32 @@ class MODEL_TENSOR(IntEnum):
|
|
|
398
418
|
ATTN_Q_NORM = auto()
|
|
399
419
|
ATTN_K_NORM = auto()
|
|
400
420
|
LAYER_OUT_NORM = auto()
|
|
421
|
+
PER_LAYER_TOKEN_EMBD = auto() # gemma3n
|
|
422
|
+
PER_LAYER_MODEL_PROJ = auto() # gemma3n
|
|
423
|
+
PER_LAYER_INP_GATE = auto() # gemma3n
|
|
424
|
+
PER_LAYER_PROJ = auto() # gemma3n
|
|
425
|
+
PER_LAYER_PROJ_NORM = auto() # gemma3n
|
|
426
|
+
PER_LAYER_POST_NORM = auto() # gemma3n
|
|
427
|
+
ALTUP_PROJ = auto() # gemma3n
|
|
428
|
+
ALTUP_UNEMBD_PROJ = auto() # gemma3n
|
|
429
|
+
ALTUP_CORRECT_COEF = auto() # gemma3n
|
|
430
|
+
ALTUP_CORRECT_SCALE = auto() # gemma3n
|
|
431
|
+
ALTUP_PREDICT_COEF = auto() # gemma3n
|
|
432
|
+
ALTUP_ROUTER = auto() # gemma3n
|
|
433
|
+
ALTUP_ROUTER_NORM = auto() # gemma3n
|
|
434
|
+
LAUREL_L = auto() # gemma3n
|
|
435
|
+
LAUREL_R = auto() # gemma3n
|
|
436
|
+
LAUREL_POST_NORM = auto() # gemma3n
|
|
401
437
|
SSM_IN = auto()
|
|
402
438
|
SSM_CONV1D = auto()
|
|
403
439
|
SSM_X = auto()
|
|
404
440
|
SSM_DT = auto()
|
|
441
|
+
SSM_DT_NORM = auto()
|
|
405
442
|
SSM_A = auto()
|
|
443
|
+
SSM_B_NORM = auto()
|
|
444
|
+
SSM_C_NORM = auto()
|
|
406
445
|
SSM_D = auto()
|
|
446
|
+
SSM_NORM = auto()
|
|
407
447
|
SSM_OUT = auto()
|
|
408
448
|
TIME_MIX_W0 = auto()
|
|
409
449
|
TIME_MIX_W1 = auto()
|
|
@@ -497,6 +537,9 @@ class MODEL_TENSOR(IntEnum):
|
|
|
497
537
|
POSNET_ATTN_K = auto()
|
|
498
538
|
POSNET_ATTN_V = auto()
|
|
499
539
|
POSNET_ATTN_OUT = auto()
|
|
540
|
+
SHORTCONV_CONV = auto()
|
|
541
|
+
SHORTCONV_INPROJ = auto()
|
|
542
|
+
SHORTCONV_OUTPROJ = auto()
|
|
500
543
|
# vision
|
|
501
544
|
V_MMPROJ = auto()
|
|
502
545
|
V_MMPROJ_FC = auto()
|
|
@@ -596,12 +639,15 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
596
639
|
MODEL_ARCH.GEMMA: "gemma",
|
|
597
640
|
MODEL_ARCH.GEMMA2: "gemma2",
|
|
598
641
|
MODEL_ARCH.GEMMA3: "gemma3",
|
|
642
|
+
MODEL_ARCH.GEMMA3N: "gemma3n",
|
|
599
643
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
|
600
644
|
MODEL_ARCH.RWKV6: "rwkv6",
|
|
601
645
|
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
|
602
646
|
MODEL_ARCH.RWKV7: "rwkv7",
|
|
603
647
|
MODEL_ARCH.ARWKV7: "arwkv7",
|
|
604
648
|
MODEL_ARCH.MAMBA: "mamba",
|
|
649
|
+
MODEL_ARCH.MAMBA2: "mamba2",
|
|
650
|
+
MODEL_ARCH.JAMBA: "jamba",
|
|
605
651
|
MODEL_ARCH.XVERSE: "xverse",
|
|
606
652
|
MODEL_ARCH.COMMAND_R: "command-r",
|
|
607
653
|
MODEL_ARCH.COHERE2: "cohere2",
|
|
@@ -623,12 +669,18 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
623
669
|
MODEL_ARCH.EXAONE: "exaone",
|
|
624
670
|
MODEL_ARCH.GRANITE: "granite",
|
|
625
671
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
|
672
|
+
MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
|
|
626
673
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
|
627
674
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
|
628
675
|
MODEL_ARCH.PLM: "plm",
|
|
629
676
|
MODEL_ARCH.BAILINGMOE: "bailingmoe",
|
|
630
677
|
MODEL_ARCH.DOTS1: "dots1",
|
|
631
678
|
MODEL_ARCH.ARCEE: "arcee",
|
|
679
|
+
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
|
680
|
+
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
|
681
|
+
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
|
|
682
|
+
MODEL_ARCH.SMOLLM3: "smollm3",
|
|
683
|
+
MODEL_ARCH.LFM2: "lfm2",
|
|
632
684
|
}
|
|
633
685
|
|
|
634
686
|
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
|
@@ -681,12 +733,32 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
681
733
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
|
682
734
|
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
|
|
683
735
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
|
736
|
+
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: "per_layer_token_embd", # gemma3n
|
|
737
|
+
MODEL_TENSOR.PER_LAYER_MODEL_PROJ: "per_layer_model_proj", # gemma3n
|
|
738
|
+
MODEL_TENSOR.PER_LAYER_PROJ_NORM: "per_layer_proj_norm", # gemma3n
|
|
739
|
+
MODEL_TENSOR.ALTUP_UNEMBD_PROJ: "altup_unembd_proj", # gemma3n
|
|
740
|
+
MODEL_TENSOR.ALTUP_PROJ: "altup_proj", # gemma3n
|
|
741
|
+
MODEL_TENSOR.PER_LAYER_INP_GATE: "blk.{bid}.inp_gate", # gemma3n
|
|
742
|
+
MODEL_TENSOR.PER_LAYER_PROJ: "blk.{bid}.proj", # gemma3n
|
|
743
|
+
MODEL_TENSOR.PER_LAYER_POST_NORM: "blk.{bid}.post_norm", # gemma3n
|
|
744
|
+
MODEL_TENSOR.ALTUP_CORRECT_COEF: "blk.{bid}.altup_correct_coef", # gemma3n
|
|
745
|
+
MODEL_TENSOR.ALTUP_CORRECT_SCALE: "blk.{bid}.altup_correct_scale", # gemma3n
|
|
746
|
+
MODEL_TENSOR.ALTUP_PREDICT_COEF: "blk.{bid}.altup_predict_coef", # gemma3n
|
|
747
|
+
MODEL_TENSOR.ALTUP_ROUTER: "blk.{bid}.altup_router", # gemma3n
|
|
748
|
+
MODEL_TENSOR.ALTUP_ROUTER_NORM: "blk.{bid}.altup_router_norm", # gemma3n
|
|
749
|
+
MODEL_TENSOR.LAUREL_L: "blk.{bid}.laurel_l", # gemma3n
|
|
750
|
+
MODEL_TENSOR.LAUREL_R: "blk.{bid}.laurel_r", # gemma3n
|
|
751
|
+
MODEL_TENSOR.LAUREL_POST_NORM: "blk.{bid}.laurel_post_norm", # gemma3n
|
|
684
752
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
|
685
753
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
|
686
754
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
|
687
755
|
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
|
756
|
+
MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
|
|
688
757
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
|
758
|
+
MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
|
|
759
|
+
MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
|
|
689
760
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
|
761
|
+
MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
|
|
690
762
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
|
691
763
|
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
|
|
692
764
|
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
|
@@ -780,6 +852,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
780
852
|
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
|
781
853
|
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
|
782
854
|
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
|
855
|
+
MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
|
|
856
|
+
MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
|
|
857
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
|
|
783
858
|
# vision
|
|
784
859
|
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
|
785
860
|
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
|
@@ -1485,6 +1560,41 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
1485
1560
|
MODEL_TENSOR.FFN_PRE_NORM,
|
|
1486
1561
|
MODEL_TENSOR.FFN_POST_NORM,
|
|
1487
1562
|
],
|
|
1563
|
+
MODEL_ARCH.GEMMA3N: [
|
|
1564
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1565
|
+
MODEL_TENSOR.OUTPUT,
|
|
1566
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
1567
|
+
MODEL_TENSOR.ATTN_Q,
|
|
1568
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
1569
|
+
MODEL_TENSOR.ATTN_K,
|
|
1570
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
1571
|
+
MODEL_TENSOR.ATTN_V,
|
|
1572
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
1573
|
+
MODEL_TENSOR.FFN_GATE,
|
|
1574
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
1575
|
+
MODEL_TENSOR.FFN_UP,
|
|
1576
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1577
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
|
1578
|
+
MODEL_TENSOR.FFN_PRE_NORM,
|
|
1579
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
|
1580
|
+
# altup / laurel
|
|
1581
|
+
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD,
|
|
1582
|
+
MODEL_TENSOR.PER_LAYER_MODEL_PROJ,
|
|
1583
|
+
MODEL_TENSOR.PER_LAYER_INP_GATE,
|
|
1584
|
+
MODEL_TENSOR.PER_LAYER_PROJ,
|
|
1585
|
+
MODEL_TENSOR.PER_LAYER_PROJ_NORM,
|
|
1586
|
+
MODEL_TENSOR.PER_LAYER_POST_NORM,
|
|
1587
|
+
MODEL_TENSOR.ALTUP_PROJ,
|
|
1588
|
+
MODEL_TENSOR.ALTUP_UNEMBD_PROJ,
|
|
1589
|
+
MODEL_TENSOR.ALTUP_CORRECT_COEF,
|
|
1590
|
+
MODEL_TENSOR.ALTUP_CORRECT_SCALE,
|
|
1591
|
+
MODEL_TENSOR.ALTUP_PREDICT_COEF,
|
|
1592
|
+
MODEL_TENSOR.ALTUP_ROUTER,
|
|
1593
|
+
MODEL_TENSOR.ALTUP_ROUTER_NORM,
|
|
1594
|
+
MODEL_TENSOR.LAUREL_L,
|
|
1595
|
+
MODEL_TENSOR.LAUREL_R,
|
|
1596
|
+
MODEL_TENSOR.LAUREL_POST_NORM,
|
|
1597
|
+
],
|
|
1488
1598
|
MODEL_ARCH.STARCODER2: [
|
|
1489
1599
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
1490
1600
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
@@ -1636,6 +1746,47 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
1636
1746
|
MODEL_TENSOR.SSM_D,
|
|
1637
1747
|
MODEL_TENSOR.SSM_OUT,
|
|
1638
1748
|
],
|
|
1749
|
+
MODEL_ARCH.MAMBA2: [
|
|
1750
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1751
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
1752
|
+
MODEL_TENSOR.OUTPUT,
|
|
1753
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1754
|
+
MODEL_TENSOR.SSM_IN,
|
|
1755
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
1756
|
+
MODEL_TENSOR.SSM_DT,
|
|
1757
|
+
MODEL_TENSOR.SSM_A,
|
|
1758
|
+
MODEL_TENSOR.SSM_D,
|
|
1759
|
+
MODEL_TENSOR.SSM_NORM,
|
|
1760
|
+
MODEL_TENSOR.SSM_OUT,
|
|
1761
|
+
],
|
|
1762
|
+
MODEL_ARCH.JAMBA: [
|
|
1763
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1764
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
1765
|
+
MODEL_TENSOR.OUTPUT,
|
|
1766
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1767
|
+
MODEL_TENSOR.ATTN_Q,
|
|
1768
|
+
MODEL_TENSOR.ATTN_K,
|
|
1769
|
+
MODEL_TENSOR.ATTN_V,
|
|
1770
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
1771
|
+
MODEL_TENSOR.SSM_IN,
|
|
1772
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
1773
|
+
MODEL_TENSOR.SSM_X,
|
|
1774
|
+
MODEL_TENSOR.SSM_DT,
|
|
1775
|
+
MODEL_TENSOR.SSM_DT_NORM,
|
|
1776
|
+
MODEL_TENSOR.SSM_A,
|
|
1777
|
+
MODEL_TENSOR.SSM_B_NORM,
|
|
1778
|
+
MODEL_TENSOR.SSM_C_NORM,
|
|
1779
|
+
MODEL_TENSOR.SSM_D,
|
|
1780
|
+
MODEL_TENSOR.SSM_OUT,
|
|
1781
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
1782
|
+
MODEL_TENSOR.FFN_NORM,
|
|
1783
|
+
MODEL_TENSOR.FFN_GATE,
|
|
1784
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
1785
|
+
MODEL_TENSOR.FFN_UP,
|
|
1786
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
1787
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
1788
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
1789
|
+
],
|
|
1639
1790
|
MODEL_ARCH.XVERSE: [
|
|
1640
1791
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
1641
1792
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
@@ -2005,6 +2156,36 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
2005
2156
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2006
2157
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2007
2158
|
],
|
|
2159
|
+
MODEL_ARCH.GRANITE_HYBRID: [
|
|
2160
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2161
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2162
|
+
MODEL_TENSOR.OUTPUT,
|
|
2163
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2164
|
+
MODEL_TENSOR.SSM_IN,
|
|
2165
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
2166
|
+
MODEL_TENSOR.SSM_DT,
|
|
2167
|
+
MODEL_TENSOR.SSM_A,
|
|
2168
|
+
MODEL_TENSOR.SSM_D,
|
|
2169
|
+
MODEL_TENSOR.SSM_NORM,
|
|
2170
|
+
MODEL_TENSOR.SSM_OUT,
|
|
2171
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2172
|
+
MODEL_TENSOR.ATTN_K,
|
|
2173
|
+
MODEL_TENSOR.ATTN_V,
|
|
2174
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2175
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2176
|
+
# MoE
|
|
2177
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
2178
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
2179
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
2180
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
2181
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
2182
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2183
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2184
|
+
# Dense
|
|
2185
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2186
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2187
|
+
MODEL_TENSOR.FFN_UP,
|
|
2188
|
+
],
|
|
2008
2189
|
MODEL_ARCH.CHAMELEON: [
|
|
2009
2190
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
2010
2191
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
@@ -2101,6 +2282,109 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
2101
2282
|
MODEL_TENSOR.FFN_DOWN,
|
|
2102
2283
|
MODEL_TENSOR.FFN_UP,
|
|
2103
2284
|
],
|
|
2285
|
+
MODEL_ARCH.ERNIE4_5: [
|
|
2286
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2287
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2288
|
+
MODEL_TENSOR.OUTPUT,
|
|
2289
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2290
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2291
|
+
MODEL_TENSOR.ATTN_K,
|
|
2292
|
+
MODEL_TENSOR.ATTN_V,
|
|
2293
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2294
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2295
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2296
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2297
|
+
MODEL_TENSOR.FFN_UP,
|
|
2298
|
+
],
|
|
2299
|
+
MODEL_ARCH.FALCON_H1: [
|
|
2300
|
+
# Token embedding
|
|
2301
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2302
|
+
|
|
2303
|
+
# Input layernorm
|
|
2304
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2305
|
+
|
|
2306
|
+
# Attention components
|
|
2307
|
+
MODEL_TENSOR.ATTN_Q, # Query projection
|
|
2308
|
+
MODEL_TENSOR.ATTN_K, # Key projection
|
|
2309
|
+
MODEL_TENSOR.ATTN_V, # Value projection
|
|
2310
|
+
MODEL_TENSOR.ATTN_OUT, # Output projection
|
|
2311
|
+
|
|
2312
|
+
# SSM components (Mamba2 specific)
|
|
2313
|
+
MODEL_TENSOR.SSM_IN, # Input projection for SSM
|
|
2314
|
+
MODEL_TENSOR.SSM_CONV1D, # Convolution layer
|
|
2315
|
+
MODEL_TENSOR.SSM_DT, # Delta time projection
|
|
2316
|
+
MODEL_TENSOR.SSM_A, # A parameter (log form)
|
|
2317
|
+
MODEL_TENSOR.SSM_D, # D parameter
|
|
2318
|
+
MODEL_TENSOR.SSM_NORM, # Normalization in SSM
|
|
2319
|
+
MODEL_TENSOR.SSM_OUT, # Output projection
|
|
2320
|
+
|
|
2321
|
+
# Pre-feedforward layernorm
|
|
2322
|
+
MODEL_TENSOR.FFN_PRE_NORM,
|
|
2323
|
+
|
|
2324
|
+
# Feed-forward network components
|
|
2325
|
+
MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
|
|
2326
|
+
MODEL_TENSOR.FFN_DOWN, # Down projection
|
|
2327
|
+
MODEL_TENSOR.FFN_UP, # Up projection
|
|
2328
|
+
|
|
2329
|
+
# Post-feedforward layernorm
|
|
2330
|
+
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
|
2331
|
+
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
|
2332
|
+
],
|
|
2333
|
+
MODEL_ARCH.HUNYUAN_MOE: [
|
|
2334
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2335
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2336
|
+
MODEL_TENSOR.OUTPUT,
|
|
2337
|
+
MODEL_TENSOR.ROPE_FREQS,
|
|
2338
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2339
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2340
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
2341
|
+
MODEL_TENSOR.ATTN_K,
|
|
2342
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
2343
|
+
MODEL_TENSOR.ATTN_V,
|
|
2344
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2345
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
2346
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2347
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
2348
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
2349
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
2350
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
2351
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2352
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2353
|
+
],
|
|
2354
|
+
MODEL_ARCH.SMOLLM3: [
|
|
2355
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2356
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2357
|
+
MODEL_TENSOR.OUTPUT,
|
|
2358
|
+
MODEL_TENSOR.ROPE_FREQS,
|
|
2359
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2360
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2361
|
+
MODEL_TENSOR.ATTN_K,
|
|
2362
|
+
MODEL_TENSOR.ATTN_V,
|
|
2363
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2364
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
|
2365
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2366
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2367
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2368
|
+
MODEL_TENSOR.FFN_UP,
|
|
2369
|
+
],
|
|
2370
|
+
MODEL_ARCH.LFM2: [
|
|
2371
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2372
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
2373
|
+
MODEL_TENSOR.SHORTCONV_CONV,
|
|
2374
|
+
MODEL_TENSOR.SHORTCONV_INPROJ,
|
|
2375
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ,
|
|
2376
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2377
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2378
|
+
MODEL_TENSOR.FFN_UP,
|
|
2379
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2380
|
+
MODEL_TENSOR.ATTN_NORM, # operator_norm
|
|
2381
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
2382
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
2383
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2384
|
+
MODEL_TENSOR.ATTN_K,
|
|
2385
|
+
MODEL_TENSOR.ATTN_V,
|
|
2386
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2387
|
+
],
|
|
2104
2388
|
# TODO
|
|
2105
2389
|
}
|
|
2106
2390
|
|
|
@@ -2405,6 +2689,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
|
|
2405
2689
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
|
2406
2690
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
|
2407
2691
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
|
2692
|
+
KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT
|
|
2408
2693
|
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
|
2409
2694
|
|
|
2410
2695
|
# tokenization
|
|
@@ -648,6 +648,9 @@ class GGUFWriter:
|
|
|
648
648
|
def add_convnext_block_count(self, length: int) -> None:
|
|
649
649
|
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
|
650
650
|
|
|
651
|
+
def add_shortconv_l_cache(self, length: int) -> None:
|
|
652
|
+
self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
|
|
653
|
+
|
|
651
654
|
def add_block_count(self, length: int) -> None:
|
|
652
655
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
|
653
656
|
|
|
@@ -672,6 +675,18 @@ class GGUFWriter:
|
|
|
672
675
|
def add_decoder_start_token_id(self, id: int) -> None:
|
|
673
676
|
self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
|
|
674
677
|
|
|
678
|
+
def add_embedding_length_per_layer_input(self, value: int) -> None:
|
|
679
|
+
self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
|
|
680
|
+
|
|
681
|
+
def add_altup_active_idx(self, val: int) -> None:
|
|
682
|
+
self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
|
|
683
|
+
|
|
684
|
+
def add_altup_num_inputs(self, val: int) -> None:
|
|
685
|
+
self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
|
|
686
|
+
|
|
687
|
+
def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
|
|
688
|
+
self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
|
|
689
|
+
|
|
675
690
|
def add_head_count(self, count: int | Sequence[int]) -> None:
|
|
676
691
|
if isinstance(count, int):
|
|
677
692
|
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
|
@@ -702,6 +717,12 @@ class GGUFWriter:
|
|
|
702
717
|
def add_clamp_kqv(self, value: float) -> None:
|
|
703
718
|
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
|
|
704
719
|
|
|
720
|
+
def add_shared_kv_layers(self, value: int) -> None:
|
|
721
|
+
self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
|
|
722
|
+
|
|
723
|
+
def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
|
|
724
|
+
self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
|
|
725
|
+
|
|
705
726
|
def add_logit_scale(self, value: float) -> None:
|
|
706
727
|
self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
|
|
707
728
|
|
|
@@ -843,6 +864,9 @@ class GGUFWriter:
|
|
|
843
864
|
def add_ssm_time_step_rank(self, value: int) -> None:
|
|
844
865
|
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
|
845
866
|
|
|
867
|
+
def add_ssm_group_count(self, value: int) -> None:
|
|
868
|
+
self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
|
|
869
|
+
|
|
846
870
|
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
|
|
847
871
|
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
|
|
848
872
|
|
|
@@ -891,6 +915,9 @@ class GGUFWriter:
|
|
|
891
915
|
def add_add_eos_token(self, value: bool) -> None:
|
|
892
916
|
self.add_bool(Keys.Tokenizer.ADD_EOS, value)
|
|
893
917
|
|
|
918
|
+
def add_add_sep_token(self, value: bool) -> None:
|
|
919
|
+
self.add_bool(Keys.Tokenizer.ADD_SEP, value)
|
|
920
|
+
|
|
894
921
|
def add_add_space_prefix(self, value: bool) -> None:
|
|
895
922
|
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
|
|
896
923
|
|