@novastera-oss/llamarn 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +17 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.h +4 -0
- package/cpp/llama.cpp/convert_hf_to_gguf.py +745 -6
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1203 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +185 -79
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +64 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +35 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +167 -39
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +254 -57
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +505 -40
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +60 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +711 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +382 -61
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +209 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +73 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +0 -40
- package/cpp/llama.cpp/src/llama-arch.cpp +210 -3
- package/cpp/llama.cpp/src/llama-arch.h +18 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +27 -1
- package/cpp/llama.cpp/src/llama-batch.h +8 -1
- package/cpp/llama.cpp/src/llama-chat.cpp +15 -0
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-graph.cpp +119 -184
- package/cpp/llama.cpp/src/llama-graph.h +47 -60
- package/cpp/llama.cpp/src/llama-hparams.cpp +7 -1
- package/cpp/llama.cpp/src/llama-hparams.h +3 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +20 -10
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +2530 -685
- package/cpp/llama.cpp/src/llama-model.h +18 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +1 -0
- package/cpp/llama.cpp/src/llama-vocab.cpp +13 -2
- package/cpp/llama.cpp/src/llama-vocab.h +41 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +4 -0
- package/ios/include/llama.h +0 -40
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5055 -4886
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5091 -4922
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -631,7 +631,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
631
631
|
gguf_free(ctx);
|
|
632
632
|
return nullptr;
|
|
633
633
|
}
|
|
634
|
-
|
|
634
|
+
size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
|
|
635
|
+
if (SIZE_MAX - ctx->size < padded_size) {
|
|
636
|
+
GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
|
|
637
|
+
__func__, ti.t.name, ctx->size, padded_size);
|
|
638
|
+
gguf_free(ctx);
|
|
639
|
+
return nullptr;
|
|
640
|
+
}
|
|
641
|
+
ctx->size += padded_size;
|
|
635
642
|
}
|
|
636
643
|
}
|
|
637
644
|
|
|
@@ -170,6 +170,7 @@ class Keys:
|
|
|
170
170
|
INNER_SIZE = "{arch}.ssm.inner_size"
|
|
171
171
|
STATE_SIZE = "{arch}.ssm.state_size"
|
|
172
172
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
|
173
|
+
GROUP_COUNT = "{arch}.ssm.group_count"
|
|
173
174
|
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
|
|
174
175
|
|
|
175
176
|
class WKV:
|
|
@@ -186,6 +187,9 @@ class Keys:
|
|
|
186
187
|
class Classifier:
|
|
187
188
|
OUTPUT_LABELS = "{arch}.classifier.output_labels"
|
|
188
189
|
|
|
190
|
+
class ShortConv:
|
|
191
|
+
L_CACHE = "{arch}.shortconv.l_cache"
|
|
192
|
+
|
|
189
193
|
class Tokenizer:
|
|
190
194
|
MODEL = "tokenizer.ggml.model"
|
|
191
195
|
PRE = "tokenizer.ggml.pre"
|
|
@@ -287,6 +291,7 @@ class MODEL_ARCH(IntEnum):
|
|
|
287
291
|
LLAMA4 = auto()
|
|
288
292
|
DECI = auto()
|
|
289
293
|
FALCON = auto()
|
|
294
|
+
FALCON_H1 = auto()
|
|
290
295
|
BAICHUAN = auto()
|
|
291
296
|
GROK = auto()
|
|
292
297
|
GPT2 = auto()
|
|
@@ -327,6 +332,8 @@ class MODEL_ARCH(IntEnum):
|
|
|
327
332
|
RWKV7 = auto()
|
|
328
333
|
ARWKV7 = auto()
|
|
329
334
|
MAMBA = auto()
|
|
335
|
+
MAMBA2 = auto()
|
|
336
|
+
JAMBA = auto()
|
|
330
337
|
XVERSE = auto()
|
|
331
338
|
COMMAND_R = auto()
|
|
332
339
|
COHERE2 = auto()
|
|
@@ -348,12 +355,17 @@ class MODEL_ARCH(IntEnum):
|
|
|
348
355
|
EXAONE = auto()
|
|
349
356
|
GRANITE = auto()
|
|
350
357
|
GRANITE_MOE = auto()
|
|
358
|
+
GRANITE_HYBRID = auto()
|
|
351
359
|
CHAMELEON = auto()
|
|
352
360
|
WAVTOKENIZER_DEC = auto()
|
|
353
361
|
PLM = auto()
|
|
354
362
|
BAILINGMOE = auto()
|
|
355
363
|
DOTS1 = auto()
|
|
356
364
|
ARCEE = auto()
|
|
365
|
+
ERNIE4_5 = auto()
|
|
366
|
+
HUNYUAN_MOE = auto()
|
|
367
|
+
SMOLLM3 = auto()
|
|
368
|
+
LFM2 = auto()
|
|
357
369
|
|
|
358
370
|
|
|
359
371
|
class VISION_PROJECTOR_TYPE(IntEnum):
|
|
@@ -426,8 +438,12 @@ class MODEL_TENSOR(IntEnum):
|
|
|
426
438
|
SSM_CONV1D = auto()
|
|
427
439
|
SSM_X = auto()
|
|
428
440
|
SSM_DT = auto()
|
|
441
|
+
SSM_DT_NORM = auto()
|
|
429
442
|
SSM_A = auto()
|
|
443
|
+
SSM_B_NORM = auto()
|
|
444
|
+
SSM_C_NORM = auto()
|
|
430
445
|
SSM_D = auto()
|
|
446
|
+
SSM_NORM = auto()
|
|
431
447
|
SSM_OUT = auto()
|
|
432
448
|
TIME_MIX_W0 = auto()
|
|
433
449
|
TIME_MIX_W1 = auto()
|
|
@@ -521,6 +537,9 @@ class MODEL_TENSOR(IntEnum):
|
|
|
521
537
|
POSNET_ATTN_K = auto()
|
|
522
538
|
POSNET_ATTN_V = auto()
|
|
523
539
|
POSNET_ATTN_OUT = auto()
|
|
540
|
+
SHORTCONV_CONV = auto()
|
|
541
|
+
SHORTCONV_INPROJ = auto()
|
|
542
|
+
SHORTCONV_OUTPROJ = auto()
|
|
524
543
|
# vision
|
|
525
544
|
V_MMPROJ = auto()
|
|
526
545
|
V_MMPROJ_FC = auto()
|
|
@@ -627,6 +646,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
627
646
|
MODEL_ARCH.RWKV7: "rwkv7",
|
|
628
647
|
MODEL_ARCH.ARWKV7: "arwkv7",
|
|
629
648
|
MODEL_ARCH.MAMBA: "mamba",
|
|
649
|
+
MODEL_ARCH.MAMBA2: "mamba2",
|
|
650
|
+
MODEL_ARCH.JAMBA: "jamba",
|
|
630
651
|
MODEL_ARCH.XVERSE: "xverse",
|
|
631
652
|
MODEL_ARCH.COMMAND_R: "command-r",
|
|
632
653
|
MODEL_ARCH.COHERE2: "cohere2",
|
|
@@ -648,12 +669,18 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
648
669
|
MODEL_ARCH.EXAONE: "exaone",
|
|
649
670
|
MODEL_ARCH.GRANITE: "granite",
|
|
650
671
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
|
672
|
+
MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
|
|
651
673
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
|
652
674
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
|
653
675
|
MODEL_ARCH.PLM: "plm",
|
|
654
676
|
MODEL_ARCH.BAILINGMOE: "bailingmoe",
|
|
655
677
|
MODEL_ARCH.DOTS1: "dots1",
|
|
656
678
|
MODEL_ARCH.ARCEE: "arcee",
|
|
679
|
+
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
|
680
|
+
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
|
681
|
+
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
|
|
682
|
+
MODEL_ARCH.SMOLLM3: "smollm3",
|
|
683
|
+
MODEL_ARCH.LFM2: "lfm2",
|
|
657
684
|
}
|
|
658
685
|
|
|
659
686
|
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
|
@@ -726,8 +753,12 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
726
753
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
|
727
754
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
|
728
755
|
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
|
756
|
+
MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm",
|
|
729
757
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
|
758
|
+
MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm",
|
|
759
|
+
MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm",
|
|
730
760
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
|
761
|
+
MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
|
|
731
762
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
|
732
763
|
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
|
|
733
764
|
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
|
@@ -821,6 +852,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
821
852
|
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
|
822
853
|
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
|
823
854
|
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
|
855
|
+
MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
|
|
856
|
+
MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
|
|
857
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
|
|
824
858
|
# vision
|
|
825
859
|
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
|
826
860
|
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
|
@@ -1712,6 +1746,47 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
1712
1746
|
MODEL_TENSOR.SSM_D,
|
|
1713
1747
|
MODEL_TENSOR.SSM_OUT,
|
|
1714
1748
|
],
|
|
1749
|
+
MODEL_ARCH.MAMBA2: [
|
|
1750
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1751
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
1752
|
+
MODEL_TENSOR.OUTPUT,
|
|
1753
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1754
|
+
MODEL_TENSOR.SSM_IN,
|
|
1755
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
1756
|
+
MODEL_TENSOR.SSM_DT,
|
|
1757
|
+
MODEL_TENSOR.SSM_A,
|
|
1758
|
+
MODEL_TENSOR.SSM_D,
|
|
1759
|
+
MODEL_TENSOR.SSM_NORM,
|
|
1760
|
+
MODEL_TENSOR.SSM_OUT,
|
|
1761
|
+
],
|
|
1762
|
+
MODEL_ARCH.JAMBA: [
|
|
1763
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1764
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
1765
|
+
MODEL_TENSOR.OUTPUT,
|
|
1766
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1767
|
+
MODEL_TENSOR.ATTN_Q,
|
|
1768
|
+
MODEL_TENSOR.ATTN_K,
|
|
1769
|
+
MODEL_TENSOR.ATTN_V,
|
|
1770
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
1771
|
+
MODEL_TENSOR.SSM_IN,
|
|
1772
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
1773
|
+
MODEL_TENSOR.SSM_X,
|
|
1774
|
+
MODEL_TENSOR.SSM_DT,
|
|
1775
|
+
MODEL_TENSOR.SSM_DT_NORM,
|
|
1776
|
+
MODEL_TENSOR.SSM_A,
|
|
1777
|
+
MODEL_TENSOR.SSM_B_NORM,
|
|
1778
|
+
MODEL_TENSOR.SSM_C_NORM,
|
|
1779
|
+
MODEL_TENSOR.SSM_D,
|
|
1780
|
+
MODEL_TENSOR.SSM_OUT,
|
|
1781
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
1782
|
+
MODEL_TENSOR.FFN_NORM,
|
|
1783
|
+
MODEL_TENSOR.FFN_GATE,
|
|
1784
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
1785
|
+
MODEL_TENSOR.FFN_UP,
|
|
1786
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
1787
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
1788
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
1789
|
+
],
|
|
1715
1790
|
MODEL_ARCH.XVERSE: [
|
|
1716
1791
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
1717
1792
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
@@ -2081,6 +2156,36 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
2081
2156
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2082
2157
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2083
2158
|
],
|
|
2159
|
+
MODEL_ARCH.GRANITE_HYBRID: [
|
|
2160
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2161
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2162
|
+
MODEL_TENSOR.OUTPUT,
|
|
2163
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2164
|
+
MODEL_TENSOR.SSM_IN,
|
|
2165
|
+
MODEL_TENSOR.SSM_CONV1D,
|
|
2166
|
+
MODEL_TENSOR.SSM_DT,
|
|
2167
|
+
MODEL_TENSOR.SSM_A,
|
|
2168
|
+
MODEL_TENSOR.SSM_D,
|
|
2169
|
+
MODEL_TENSOR.SSM_NORM,
|
|
2170
|
+
MODEL_TENSOR.SSM_OUT,
|
|
2171
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2172
|
+
MODEL_TENSOR.ATTN_K,
|
|
2173
|
+
MODEL_TENSOR.ATTN_V,
|
|
2174
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2175
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2176
|
+
# MoE
|
|
2177
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
2178
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
2179
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
2180
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
2181
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
2182
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2183
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2184
|
+
# Dense
|
|
2185
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2186
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2187
|
+
MODEL_TENSOR.FFN_UP,
|
|
2188
|
+
],
|
|
2084
2189
|
MODEL_ARCH.CHAMELEON: [
|
|
2085
2190
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
2086
2191
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
@@ -2177,6 +2282,109 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
2177
2282
|
MODEL_TENSOR.FFN_DOWN,
|
|
2178
2283
|
MODEL_TENSOR.FFN_UP,
|
|
2179
2284
|
],
|
|
2285
|
+
MODEL_ARCH.ERNIE4_5: [
|
|
2286
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2287
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2288
|
+
MODEL_TENSOR.OUTPUT,
|
|
2289
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2290
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2291
|
+
MODEL_TENSOR.ATTN_K,
|
|
2292
|
+
MODEL_TENSOR.ATTN_V,
|
|
2293
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2294
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2295
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2296
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2297
|
+
MODEL_TENSOR.FFN_UP,
|
|
2298
|
+
],
|
|
2299
|
+
MODEL_ARCH.FALCON_H1: [
|
|
2300
|
+
# Token embedding
|
|
2301
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2302
|
+
|
|
2303
|
+
# Input layernorm
|
|
2304
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2305
|
+
|
|
2306
|
+
# Attention components
|
|
2307
|
+
MODEL_TENSOR.ATTN_Q, # Query projection
|
|
2308
|
+
MODEL_TENSOR.ATTN_K, # Key projection
|
|
2309
|
+
MODEL_TENSOR.ATTN_V, # Value projection
|
|
2310
|
+
MODEL_TENSOR.ATTN_OUT, # Output projection
|
|
2311
|
+
|
|
2312
|
+
# SSM components (Mamba2 specific)
|
|
2313
|
+
MODEL_TENSOR.SSM_IN, # Input projection for SSM
|
|
2314
|
+
MODEL_TENSOR.SSM_CONV1D, # Convolution layer
|
|
2315
|
+
MODEL_TENSOR.SSM_DT, # Delta time projection
|
|
2316
|
+
MODEL_TENSOR.SSM_A, # A parameter (log form)
|
|
2317
|
+
MODEL_TENSOR.SSM_D, # D parameter
|
|
2318
|
+
MODEL_TENSOR.SSM_NORM, # Normalization in SSM
|
|
2319
|
+
MODEL_TENSOR.SSM_OUT, # Output projection
|
|
2320
|
+
|
|
2321
|
+
# Pre-feedforward layernorm
|
|
2322
|
+
MODEL_TENSOR.FFN_PRE_NORM,
|
|
2323
|
+
|
|
2324
|
+
# Feed-forward network components
|
|
2325
|
+
MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
|
|
2326
|
+
MODEL_TENSOR.FFN_DOWN, # Down projection
|
|
2327
|
+
MODEL_TENSOR.FFN_UP, # Up projection
|
|
2328
|
+
|
|
2329
|
+
# Post-feedforward layernorm
|
|
2330
|
+
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
|
2331
|
+
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
|
2332
|
+
],
|
|
2333
|
+
MODEL_ARCH.HUNYUAN_MOE: [
|
|
2334
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2335
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2336
|
+
MODEL_TENSOR.OUTPUT,
|
|
2337
|
+
MODEL_TENSOR.ROPE_FREQS,
|
|
2338
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2339
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2340
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
2341
|
+
MODEL_TENSOR.ATTN_K,
|
|
2342
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
2343
|
+
MODEL_TENSOR.ATTN_V,
|
|
2344
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2345
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
2346
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2347
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
2348
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
2349
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
2350
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
2351
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2352
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2353
|
+
],
|
|
2354
|
+
MODEL_ARCH.SMOLLM3: [
|
|
2355
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2356
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2357
|
+
MODEL_TENSOR.OUTPUT,
|
|
2358
|
+
MODEL_TENSOR.ROPE_FREQS,
|
|
2359
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2360
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2361
|
+
MODEL_TENSOR.ATTN_K,
|
|
2362
|
+
MODEL_TENSOR.ATTN_V,
|
|
2363
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2364
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
|
2365
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2366
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2367
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2368
|
+
MODEL_TENSOR.FFN_UP,
|
|
2369
|
+
],
|
|
2370
|
+
MODEL_ARCH.LFM2: [
|
|
2371
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2372
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
2373
|
+
MODEL_TENSOR.SHORTCONV_CONV,
|
|
2374
|
+
MODEL_TENSOR.SHORTCONV_INPROJ,
|
|
2375
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ,
|
|
2376
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2377
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2378
|
+
MODEL_TENSOR.FFN_UP,
|
|
2379
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2380
|
+
MODEL_TENSOR.ATTN_NORM, # operator_norm
|
|
2381
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
2382
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
2383
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2384
|
+
MODEL_TENSOR.ATTN_K,
|
|
2385
|
+
MODEL_TENSOR.ATTN_V,
|
|
2386
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2387
|
+
],
|
|
2180
2388
|
# TODO
|
|
2181
2389
|
}
|
|
2182
2390
|
|
|
@@ -2481,6 +2689,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
|
|
2481
2689
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
|
2482
2690
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
|
2483
2691
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
|
2692
|
+
KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT
|
|
2484
2693
|
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
|
2485
2694
|
|
|
2486
2695
|
# tokenization
|
|
@@ -648,6 +648,9 @@ class GGUFWriter:
|
|
|
648
648
|
def add_convnext_block_count(self, length: int) -> None:
|
|
649
649
|
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
|
650
650
|
|
|
651
|
+
def add_shortconv_l_cache(self, length: int) -> None:
|
|
652
|
+
self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
|
|
653
|
+
|
|
651
654
|
def add_block_count(self, length: int) -> None:
|
|
652
655
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
|
653
656
|
|
|
@@ -714,8 +717,8 @@ class GGUFWriter:
|
|
|
714
717
|
def add_clamp_kqv(self, value: float) -> None:
|
|
715
718
|
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
|
|
716
719
|
|
|
717
|
-
def add_shared_kv_layers(self, value:
|
|
718
|
-
self.
|
|
720
|
+
def add_shared_kv_layers(self, value: int) -> None:
|
|
721
|
+
self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
|
|
719
722
|
|
|
720
723
|
def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
|
|
721
724
|
self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
|
|
@@ -861,6 +864,9 @@ class GGUFWriter:
|
|
|
861
864
|
def add_ssm_time_step_rank(self, value: int) -> None:
|
|
862
865
|
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
|
863
866
|
|
|
867
|
+
def add_ssm_group_count(self, value: int) -> None:
|
|
868
|
+
self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
|
|
869
|
+
|
|
864
870
|
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
|
|
865
871
|
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
|
|
866
872
|
|
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
|
14
14
|
"transformer.word_embeddings", # falcon
|
|
15
15
|
"word_embeddings", # bloom
|
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
|
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
|
|
17
17
|
"tok_embeddings", # llama-pth
|
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
|
@@ -50,6 +50,7 @@ class TensorNameMap:
|
|
|
50
50
|
"model.pre_ln", # rwkv7
|
|
51
51
|
"model.layers.0.pre_norm", # rwkv7
|
|
52
52
|
"backbone.norm", # wavtokenizer
|
|
53
|
+
"model.embedding_norm", # lfm2
|
|
53
54
|
),
|
|
54
55
|
|
|
55
56
|
# Position embeddings
|
|
@@ -118,7 +119,7 @@ class TensorNameMap:
|
|
|
118
119
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
|
119
120
|
"h.{bid}.input_layernorm", # bloom
|
|
120
121
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
|
121
|
-
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
|
122
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
|
|
122
123
|
"layers.{bid}.attention_norm", # llama-pth
|
|
123
124
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
|
124
125
|
"model.layers.{bid}.ln1", # yi
|
|
@@ -136,6 +137,7 @@ class TensorNameMap:
|
|
|
136
137
|
"model.layers.{bid}.ln1", # rwkv7
|
|
137
138
|
"model.layers.{bid}.input_layernorm", # llama4
|
|
138
139
|
"transformer_encoder.{bid}.attention_norm", # neobert
|
|
140
|
+
"model.layers.{bid}.operator_norm", # lfm2
|
|
139
141
|
),
|
|
140
142
|
|
|
141
143
|
# Attention norm 2
|
|
@@ -220,6 +222,7 @@ class TensorNameMap:
|
|
|
220
222
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
|
221
223
|
"h.{bid}.self_attention.dense", # bloom
|
|
222
224
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
|
225
|
+
"model.layers.{bid}.self_attn.out_proj", # lfm2
|
|
223
226
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
|
224
227
|
"layers.{bid}.attention.wo", # llama-pth
|
|
225
228
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
|
@@ -279,6 +282,8 @@ class TensorNameMap:
|
|
|
279
282
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
|
280
283
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
281
284
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
285
|
+
"model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
|
|
286
|
+
"model.layers.{bid}.pre_moe_layernorm", # mini-jamba
|
|
282
287
|
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
283
288
|
"transformer_encoder.{bid}.ffn_norm", # neobert
|
|
284
289
|
),
|
|
@@ -286,12 +291,14 @@ class TensorNameMap:
|
|
|
286
291
|
# Post feed-forward norm
|
|
287
292
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
|
288
293
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
|
294
|
+
"model.layers.{bid}.pre_ff_layernorm.weight",
|
|
289
295
|
),
|
|
290
296
|
|
|
291
297
|
# Post feed-forward norm
|
|
292
298
|
MODEL_TENSOR.FFN_POST_NORM: (
|
|
293
299
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
|
294
300
|
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
|
|
301
|
+
"model.layers.{bid}.feed_forward.up_proj",
|
|
295
302
|
),
|
|
296
303
|
|
|
297
304
|
MODEL_TENSOR.FFN_GATE_INP: (
|
|
@@ -301,8 +308,9 @@ class TensorNameMap:
|
|
|
301
308
|
"transformer.decoder_layer.{bid}.router", # Grok
|
|
302
309
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
|
303
310
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
|
304
|
-
"model.layers.{bid}.feed_forward.router", # llama4
|
|
311
|
+
"model.layers.{bid}.feed_forward.router", # llama4 jamba
|
|
305
312
|
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
|
313
|
+
"model.layers.{bid}.mlp.gate.wg", # hunyuan
|
|
306
314
|
),
|
|
307
315
|
|
|
308
316
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
|
@@ -344,7 +352,7 @@ class TensorNameMap:
|
|
|
344
352
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
345
353
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
346
354
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
347
|
-
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
355
|
+
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
|
|
348
356
|
"transformer_encoder.{bid}.ffn.w12", # neobert
|
|
349
357
|
),
|
|
350
358
|
|
|
@@ -362,6 +370,8 @@ class TensorNameMap:
|
|
|
362
370
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
|
363
371
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
|
364
372
|
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
|
373
|
+
"model.layers.{bid}.feed_forward.down_proj",
|
|
374
|
+
"model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
|
|
365
375
|
),
|
|
366
376
|
|
|
367
377
|
# AWQ-activation gate
|
|
@@ -382,7 +392,7 @@ class TensorNameMap:
|
|
|
382
392
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
383
393
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
384
394
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
385
|
-
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
|
395
|
+
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
|
|
386
396
|
),
|
|
387
397
|
|
|
388
398
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
|
@@ -398,6 +408,7 @@ class TensorNameMap:
|
|
|
398
408
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
|
399
409
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
|
400
410
|
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
|
411
|
+
"model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
|
|
401
412
|
),
|
|
402
413
|
|
|
403
414
|
# Feed-forward down
|
|
@@ -427,7 +438,7 @@ class TensorNameMap:
|
|
|
427
438
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
|
428
439
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
429
440
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
430
|
-
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
441
|
+
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
|
|
431
442
|
"transformer_encoder.{bid}.ffn.w3", # neobert
|
|
432
443
|
),
|
|
433
444
|
|
|
@@ -447,11 +458,13 @@ class TensorNameMap:
|
|
|
447
458
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
|
448
459
|
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
|
449
460
|
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
|
|
461
|
+
"model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
|
|
450
462
|
),
|
|
451
463
|
|
|
452
464
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
|
453
465
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
|
454
466
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
|
467
|
+
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
|
|
455
468
|
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
|
456
469
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
|
457
470
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
|
@@ -461,6 +474,7 @@ class TensorNameMap:
|
|
|
461
474
|
MODEL_TENSOR.ATTN_K_NORM: (
|
|
462
475
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
|
463
476
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
|
477
|
+
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
|
|
464
478
|
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
|
465
479
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
|
466
480
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
|
@@ -477,7 +491,7 @@ class TensorNameMap:
|
|
|
477
491
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
|
478
492
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
|
479
493
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
|
480
|
-
"encoder.layer.{bid}.layer_norm_2"
|
|
494
|
+
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
|
481
495
|
),
|
|
482
496
|
|
|
483
497
|
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
|
|
@@ -545,38 +559,64 @@ class TensorNameMap:
|
|
|
545
559
|
),
|
|
546
560
|
|
|
547
561
|
MODEL_TENSOR.SSM_IN: (
|
|
548
|
-
"model.layers.{bid}.in_proj",
|
|
549
|
-
"backbone.layers.{bid}.mixer.in_proj",
|
|
562
|
+
"model.layers.{bid}.in_proj", # mamba-hf
|
|
563
|
+
"backbone.layers.{bid}.mixer.in_proj", # mamba
|
|
564
|
+
"model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
|
|
550
565
|
),
|
|
551
566
|
|
|
552
567
|
MODEL_TENSOR.SSM_CONV1D: (
|
|
553
|
-
"model.layers.{bid}.conv1d",
|
|
554
|
-
"backbone.layers.{bid}.mixer.conv1d",
|
|
568
|
+
"model.layers.{bid}.conv1d", # mamba-hf
|
|
569
|
+
"backbone.layers.{bid}.mixer.conv1d", # mamba
|
|
570
|
+
"model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
|
|
555
571
|
),
|
|
556
572
|
|
|
557
573
|
MODEL_TENSOR.SSM_X: (
|
|
558
|
-
"model.layers.{bid}.x_proj",
|
|
559
|
-
"backbone.layers.{bid}.mixer.x_proj",
|
|
574
|
+
"model.layers.{bid}.x_proj", # mamba-hf
|
|
575
|
+
"backbone.layers.{bid}.mixer.x_proj", # mamba
|
|
576
|
+
"model.layers.{bid}.mamba.x_proj", # jamba
|
|
560
577
|
),
|
|
561
578
|
|
|
562
579
|
MODEL_TENSOR.SSM_DT: (
|
|
563
|
-
"model.layers.{bid}.dt_proj",
|
|
564
|
-
"backbone.layers.{bid}.mixer.dt_proj",
|
|
580
|
+
"model.layers.{bid}.dt_proj", # mamba-hf
|
|
581
|
+
"backbone.layers.{bid}.mixer.dt_proj", # mamba
|
|
582
|
+
"model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
|
|
583
|
+
),
|
|
584
|
+
|
|
585
|
+
MODEL_TENSOR.SSM_DT_NORM: (
|
|
586
|
+
"model.layers.{bid}.mamba.dt_layernorm", # jamba
|
|
565
587
|
),
|
|
566
588
|
|
|
567
589
|
MODEL_TENSOR.SSM_A: (
|
|
568
|
-
"model.layers.{bid}.A_log",
|
|
569
|
-
"backbone.layers.{bid}.mixer.A_log",
|
|
590
|
+
"model.layers.{bid}.A_log", # mamba-hf
|
|
591
|
+
"backbone.layers.{bid}.mixer.A_log", # mamba
|
|
592
|
+
"model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
|
|
593
|
+
),
|
|
594
|
+
|
|
595
|
+
MODEL_TENSOR.SSM_B_NORM: (
|
|
596
|
+
"model.layers.{bid}.mamba.b_layernorm", # jamba
|
|
597
|
+
"model.layers.{bid}.mamba.B_layernorm", # mini-jamba
|
|
598
|
+
),
|
|
599
|
+
|
|
600
|
+
MODEL_TENSOR.SSM_C_NORM: (
|
|
601
|
+
"model.layers.{bid}.mamba.c_layernorm", # jamba
|
|
602
|
+
"model.layers.{bid}.mamba.C_layernorm", # mini-jamba
|
|
570
603
|
),
|
|
571
604
|
|
|
572
605
|
MODEL_TENSOR.SSM_D: (
|
|
573
|
-
"model.layers.{bid}.D",
|
|
574
|
-
"backbone.layers.{bid}.mixer.D",
|
|
606
|
+
"model.layers.{bid}.D", # mamba-hf
|
|
607
|
+
"backbone.layers.{bid}.mixer.D", # mamba
|
|
608
|
+
"model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
|
|
609
|
+
),
|
|
610
|
+
|
|
611
|
+
MODEL_TENSOR.SSM_NORM: (
|
|
612
|
+
"model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
|
|
613
|
+
"backbone.layers.{bid}.mixer.norm", # mamba2
|
|
575
614
|
),
|
|
576
615
|
|
|
577
616
|
MODEL_TENSOR.SSM_OUT: (
|
|
578
|
-
"model.layers.{bid}.out_proj",
|
|
579
|
-
"backbone.layers.{bid}.mixer.out_proj",
|
|
617
|
+
"model.layers.{bid}.out_proj", # mamba-hf
|
|
618
|
+
"backbone.layers.{bid}.mixer.out_proj", # mamba
|
|
619
|
+
"model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
|
|
580
620
|
),
|
|
581
621
|
|
|
582
622
|
MODEL_TENSOR.TIME_MIX_W0: (
|
|
@@ -978,6 +1018,18 @@ class TensorNameMap:
|
|
|
978
1018
|
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
|
979
1019
|
),
|
|
980
1020
|
|
|
1021
|
+
MODEL_TENSOR.SHORTCONV_CONV: (
|
|
1022
|
+
"model.layers.{bid}.conv.conv",
|
|
1023
|
+
),
|
|
1024
|
+
|
|
1025
|
+
MODEL_TENSOR.SHORTCONV_INPROJ: (
|
|
1026
|
+
"model.layers.{bid}.conv.in_proj",
|
|
1027
|
+
),
|
|
1028
|
+
|
|
1029
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ: (
|
|
1030
|
+
"model.layers.{bid}.conv.out_proj",
|
|
1031
|
+
),
|
|
1032
|
+
|
|
981
1033
|
#############################################################################
|
|
982
1034
|
## Vision encoder
|
|
983
1035
|
|
|
@@ -245,9 +245,18 @@ class SpecialVocab:
|
|
|
245
245
|
if not tokenizer_config:
|
|
246
246
|
return True
|
|
247
247
|
chat_template_alt = None
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
248
|
+
chat_template_json = path / 'chat_template.json'
|
|
249
|
+
chat_template_jinja = path / 'chat_template.jinja'
|
|
250
|
+
if chat_template_jinja.is_file():
|
|
251
|
+
with open(chat_template_jinja, encoding = 'utf-8') as f:
|
|
252
|
+
chat_template_alt = f.read()
|
|
253
|
+
if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
|
|
254
|
+
chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
|
|
255
|
+
for template_path in additional_templates:
|
|
256
|
+
with open(template_path, encoding = 'utf-8') as fp:
|
|
257
|
+
chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
|
|
258
|
+
elif chat_template_json.is_file():
|
|
259
|
+
with open(chat_template_json, encoding = 'utf-8') as f:
|
|
251
260
|
chat_template_alt = json.load(f).get('chat_template')
|
|
252
261
|
chat_template = tokenizer_config.get('chat_template', chat_template_alt)
|
|
253
262
|
if chat_template is None or isinstance(chat_template, (str, list)):
|