@novastera-oss/llamarn 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +4 -3
- package/cpp/llama.cpp/common/arg.cpp +45 -1
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +18 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +500 -32
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +12 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +6 -1
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +8 -20
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +58 -3
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +122 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +14 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +64 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -67
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +45 -62
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +28 -43
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +41 -56
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -47
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +31 -43
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +22 -37
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +73 -23
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -689
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +13 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +13 -3
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +407 -69
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +380 -83
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +295 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +131 -46
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +43 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +287 -22
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +1 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +71 -16
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +4 -6
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +98 -0
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +75 -52
- package/cpp/llama.cpp/include/llama.h +15 -7
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +106 -0
- package/cpp/llama.cpp/src/llama-arch.h +5 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +76 -70
- package/cpp/llama.cpp/src/llama-batch.h +24 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +43 -1
- package/cpp/llama.cpp/src/llama-chat.h +2 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +203 -39
- package/cpp/llama.cpp/src/llama-graph.h +147 -72
- package/cpp/llama.cpp/src/llama-hparams.cpp +40 -0
- package/cpp/llama.cpp/src/llama-hparams.h +10 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +11 -5
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +3 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +698 -302
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +89 -31
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +1 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +16 -1
- package/cpp/llama.cpp/src/llama-model.cpp +1293 -312
- package/cpp/llama.cpp/src/llama-model.h +3 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +1 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +363 -8
- package/cpp/llama.cpp/src/llama-vocab.h +2 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/common.h +18 -4
- package/ios/include/llama.h +15 -7
- package/ios/libs/llama.xcframework/Info.plist +15 -15
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -5059
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3889
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3891
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -5059
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3889
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -5095
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -5066
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3919
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
|
@@ -669,6 +669,36 @@ class TextModel(ModelBase):
|
|
|
669
669
|
# NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script
|
|
670
670
|
# or pull the latest version of the model from Huggingface
|
|
671
671
|
# don't edit the hashes manually!
|
|
672
|
+
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
|
|
673
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
674
|
+
res = "chatglm-bpe"
|
|
675
|
+
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
|
|
676
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
677
|
+
res = "chatglm-bpe"
|
|
678
|
+
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
|
|
679
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
|
|
680
|
+
res = "glm4"
|
|
681
|
+
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
|
682
|
+
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
|
683
|
+
res = "minerva-7b"
|
|
684
|
+
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
|
|
685
|
+
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
|
|
686
|
+
res = "hunyuan"
|
|
687
|
+
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
|
|
688
|
+
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
|
|
689
|
+
res = "falcon-h1"
|
|
690
|
+
if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86":
|
|
691
|
+
# ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base
|
|
692
|
+
res = "falcon-h1"
|
|
693
|
+
if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896":
|
|
694
|
+
# ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base
|
|
695
|
+
res = "falcon-h1"
|
|
696
|
+
if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
|
|
697
|
+
# ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
|
|
698
|
+
res = "falcon-h1"
|
|
699
|
+
if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
|
|
700
|
+
# ref: https://huggingface.co/moonshotai/Kimi-K2-Base
|
|
701
|
+
res = "kimi-k2"
|
|
672
702
|
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
|
|
673
703
|
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
|
674
704
|
res = "llama-bpe"
|
|
@@ -804,42 +834,18 @@ class TextModel(ModelBase):
|
|
|
804
834
|
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
|
|
805
835
|
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
|
|
806
836
|
res = "seed-coder"
|
|
807
|
-
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
|
|
808
|
-
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
809
|
-
res = "chatglm-bpe"
|
|
810
|
-
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
|
|
811
|
-
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
812
|
-
res = "chatglm-bpe"
|
|
813
|
-
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
|
|
814
|
-
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
|
|
815
|
-
res = "glm4"
|
|
816
|
-
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
|
817
|
-
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
|
818
|
-
res = "minerva-7b"
|
|
819
|
-
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
|
|
820
|
-
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
|
|
821
|
-
res = "hunyuan"
|
|
822
837
|
if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf":
|
|
823
838
|
# ref: https://huggingface.co/skt/A.X-4.0
|
|
824
839
|
res = "a.x-4.0"
|
|
825
|
-
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
|
|
826
|
-
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
|
|
827
|
-
res = "falcon-h1"
|
|
828
|
-
if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86":
|
|
829
|
-
# ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base
|
|
830
|
-
res = "falcon-h1"
|
|
831
|
-
if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896":
|
|
832
|
-
# ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base
|
|
833
|
-
res = "falcon-h1"
|
|
834
|
-
if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
|
|
835
|
-
# ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
|
|
836
|
-
res = "falcon-h1"
|
|
837
840
|
if chkhsh == "f6791d196f87ce6b56a7d234be618e0d58f8cda3549416635b2bebcd22cd95c4":
|
|
838
841
|
# ref: https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct
|
|
839
842
|
res = "midm-2.0"
|
|
840
843
|
if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
|
|
841
844
|
# ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
|
|
842
845
|
res = "lfm2"
|
|
846
|
+
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
|
|
847
|
+
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
|
|
848
|
+
res = "exaone4"
|
|
843
849
|
|
|
844
850
|
if res is None:
|
|
845
851
|
logger.warning("\n")
|
|
@@ -1082,7 +1088,14 @@ class TextModel(ModelBase):
|
|
|
1082
1088
|
self.gguf_writer.add_token_list(tokens)
|
|
1083
1089
|
self.gguf_writer.add_token_types(toktypes)
|
|
1084
1090
|
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
|
|
1085
|
-
special_vocab.chat_template
|
|
1091
|
+
if special_vocab.chat_template is None:
|
|
1092
|
+
template_path = Path(__file__).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja"
|
|
1093
|
+
if template_path.is_file():
|
|
1094
|
+
with open(template_path, "r", encoding="utf-8") as f:
|
|
1095
|
+
template = f.read()
|
|
1096
|
+
else:
|
|
1097
|
+
template = "rwkv-world"
|
|
1098
|
+
special_vocab.chat_template = template
|
|
1086
1099
|
# hack: Add '\n\n' as the EOT token to make it chat normally
|
|
1087
1100
|
special_vocab._set_special_token("eot", 261)
|
|
1088
1101
|
# hack: Override these as they have already been set (incorrectly)
|
|
@@ -2768,6 +2781,76 @@ class Qwen2Model(TextModel):
|
|
|
2768
2781
|
yield from super().modify_tensors(data_torch, name, bid)
|
|
2769
2782
|
|
|
2770
2783
|
|
|
2784
|
+
@ModelBase.register("DreamModel")
|
|
2785
|
+
class DreamModel(TextModel):
|
|
2786
|
+
model_arch = gguf.MODEL_ARCH.DREAM
|
|
2787
|
+
|
|
2788
|
+
def get_vocab_base(self) -> tuple[list[str], list[int], str]:
|
|
2789
|
+
tokens: list[str] = []
|
|
2790
|
+
toktypes: list[int] = []
|
|
2791
|
+
|
|
2792
|
+
from transformers import AutoTokenizer
|
|
2793
|
+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
|
|
2794
|
+
|
|
2795
|
+
vocab_dict = tokenizer.get_vocab()
|
|
2796
|
+
vocab_size = self.hparams.get("vocab_size", len(vocab_dict))
|
|
2797
|
+
assert max(vocab_dict.values()) < vocab_size
|
|
2798
|
+
|
|
2799
|
+
tokpre = self.get_vocab_base_pre(tokenizer)
|
|
2800
|
+
|
|
2801
|
+
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in vocab_dict.items()}
|
|
2802
|
+
added_vocab = tokenizer.get_added_vocab()
|
|
2803
|
+
|
|
2804
|
+
for i in range(vocab_size):
|
|
2805
|
+
if i not in reverse_vocab:
|
|
2806
|
+
tokens.append(f"[PAD{i}]")
|
|
2807
|
+
toktypes.append(gguf.TokenType.UNUSED)
|
|
2808
|
+
elif reverse_vocab[i] in added_vocab:
|
|
2809
|
+
tokens.append(reverse_vocab[i])
|
|
2810
|
+
# Check if it's a special token - treat special tokens as CONTROL tokens
|
|
2811
|
+
if hasattr(tokenizer, 'added_tokens_decoder') and i in tokenizer.added_tokens_decoder:
|
|
2812
|
+
if tokenizer.added_tokens_decoder[i].special:
|
|
2813
|
+
toktypes.append(gguf.TokenType.CONTROL)
|
|
2814
|
+
else:
|
|
2815
|
+
toktypes.append(gguf.TokenType.USER_DEFINED)
|
|
2816
|
+
else:
|
|
2817
|
+
# Fallback: treat all added vocab as control tokens for special tokens like <|im_start|>
|
|
2818
|
+
toktypes.append(gguf.TokenType.CONTROL)
|
|
2819
|
+
else:
|
|
2820
|
+
tokens.append(reverse_vocab[i])
|
|
2821
|
+
toktypes.append(gguf.TokenType.NORMAL)
|
|
2822
|
+
|
|
2823
|
+
return tokens, toktypes, tokpre
|
|
2824
|
+
|
|
2825
|
+
def set_vocab(self):
|
|
2826
|
+
try:
|
|
2827
|
+
self._set_vocab_sentencepiece()
|
|
2828
|
+
except FileNotFoundError:
|
|
2829
|
+
self._set_vocab_gpt2()
|
|
2830
|
+
|
|
2831
|
+
def set_gguf_parameters(self):
|
|
2832
|
+
super().set_gguf_parameters()
|
|
2833
|
+
self._try_set_pooling_type()
|
|
2834
|
+
|
|
2835
|
+
# Dream models use non-causal attention for diffusion
|
|
2836
|
+
self.gguf_writer.add_causal_attention(False)
|
|
2837
|
+
# Handle RoPE scaling similar to Qwen2
|
|
2838
|
+
rope_scaling = self.hparams.get("rope_scaling") or {}
|
|
2839
|
+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
|
|
2840
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
|
2841
|
+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
|
|
2842
|
+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
|
|
2843
|
+
|
|
2844
|
+
# Add Dream-specific parameters
|
|
2845
|
+
mask_token_id = self.hparams.get("mask_token_id")
|
|
2846
|
+
if mask_token_id is not None:
|
|
2847
|
+
self.gguf_writer.add_mask_token_id(mask_token_id)
|
|
2848
|
+
|
|
2849
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2850
|
+
# Dream model tensors should be mapped directly since it's the base model
|
|
2851
|
+
yield from super().modify_tensors(data_torch, name, bid)
|
|
2852
|
+
|
|
2853
|
+
|
|
2771
2854
|
@ModelBase.register("Ernie4_5_ForCausalLM")
|
|
2772
2855
|
class Ernie4_5Model(TextModel):
|
|
2773
2856
|
model_arch = gguf.MODEL_ARCH.ERNIE4_5
|
|
@@ -2781,7 +2864,8 @@ class Ernie4_5Model(TextModel):
|
|
|
2781
2864
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2782
2865
|
num_heads = self.hparams["num_attention_heads"]
|
|
2783
2866
|
num_kv_heads = self.hparams["num_key_value_heads"]
|
|
2784
|
-
head_dim
|
|
2867
|
+
if (head_dim := self.hparams.get("head_dim")) is None:
|
|
2868
|
+
head_dim = self.hparams["hidden_size"] // num_heads
|
|
2785
2869
|
|
|
2786
2870
|
if "ernie." in name:
|
|
2787
2871
|
name = name.replace("ernie.", "model.")
|
|
@@ -2814,6 +2898,93 @@ class Ernie4_5Model(TextModel):
|
|
|
2814
2898
|
return [(self.map_tensor_name(name), data_torch)]
|
|
2815
2899
|
|
|
2816
2900
|
|
|
2901
|
+
@ModelBase.register("Ernie4_5_MoeForCausalLM")
|
|
2902
|
+
class Ernie4_5MoeModel(Ernie4_5Model):
|
|
2903
|
+
model_arch = gguf.MODEL_ARCH.ERNIE4_5_MOE
|
|
2904
|
+
_experts: list[dict[str, Tensor]] | None = None
|
|
2905
|
+
|
|
2906
|
+
def __init__(self, *args, **kwargs):
|
|
2907
|
+
super().__init__(*args, **kwargs)
|
|
2908
|
+
self._experts = [{} for _ in range(self.block_count)]
|
|
2909
|
+
|
|
2910
|
+
def set_gguf_parameters(self):
|
|
2911
|
+
super().set_gguf_parameters()
|
|
2912
|
+
self.gguf_writer.add_expert_count(self.hparams["moe_num_experts"])
|
|
2913
|
+
self.gguf_writer.add_expert_used_count(self.hparams["moe_k"])
|
|
2914
|
+
self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"])
|
|
2915
|
+
self.gguf_writer.add_leading_dense_block_count(self.hparams["moe_layer_start_index"])
|
|
2916
|
+
if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
|
|
2917
|
+
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
|
|
2918
|
+
if (shared_expert_count := self.hparams.get('moe_num_shared_experts')) is not None:
|
|
2919
|
+
self.gguf_writer.add_expert_shared_count(shared_expert_count)
|
|
2920
|
+
if shared_expert_count > 0 and (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None:
|
|
2921
|
+
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads)
|
|
2922
|
+
|
|
2923
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2924
|
+
# Modify correction bias name as in DeepseekV2
|
|
2925
|
+
if name.endswith("e_score_correction_bias"):
|
|
2926
|
+
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
|
2927
|
+
|
|
2928
|
+
# skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2)
|
|
2929
|
+
match = re.match(r"model.mtp_block.(\d+)", name)
|
|
2930
|
+
if match:
|
|
2931
|
+
return []
|
|
2932
|
+
|
|
2933
|
+
# skip all other MTP tensors for now
|
|
2934
|
+
match = re.match(r"model.mtp_emb_norm.(\d+)", name)
|
|
2935
|
+
if match:
|
|
2936
|
+
return []
|
|
2937
|
+
|
|
2938
|
+
match = re.match(r"model.mtp_hidden_norm.(\d+)", name)
|
|
2939
|
+
if match:
|
|
2940
|
+
return []
|
|
2941
|
+
|
|
2942
|
+
match = re.match(r"model.mtp_linear_proj.(\d+)", name)
|
|
2943
|
+
if match:
|
|
2944
|
+
return []
|
|
2945
|
+
|
|
2946
|
+
# process the experts separately
|
|
2947
|
+
if name.find("mlp.experts") != -1:
|
|
2948
|
+
n_experts = self.hparams["moe_num_experts"]
|
|
2949
|
+
assert bid is not None
|
|
2950
|
+
|
|
2951
|
+
if self._experts is None:
|
|
2952
|
+
self._experts = [{} for _ in range(self.block_count)]
|
|
2953
|
+
|
|
2954
|
+
self._experts[bid][name] = data_torch
|
|
2955
|
+
|
|
2956
|
+
if len(self._experts[bid]) >= n_experts * 3:
|
|
2957
|
+
tensors: list[tuple[str, Tensor]] = []
|
|
2958
|
+
|
|
2959
|
+
# merge the experts into a single 3d tensor
|
|
2960
|
+
for w_name in ["gate_proj", "up_proj", "down_proj"]:
|
|
2961
|
+
datas: list[Tensor] = []
|
|
2962
|
+
|
|
2963
|
+
for xid in range(n_experts):
|
|
2964
|
+
ename_to_retrieve = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
|
|
2965
|
+
datas.append(self._experts[bid][ename_to_retrieve])
|
|
2966
|
+
del self._experts[bid][ename_to_retrieve]
|
|
2967
|
+
|
|
2968
|
+
data_torch = torch.stack(datas, dim=0)
|
|
2969
|
+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
|
|
2970
|
+
new_name = self.map_tensor_name(merged_name)
|
|
2971
|
+
tensors.append((new_name, data_torch))
|
|
2972
|
+
|
|
2973
|
+
return tensors
|
|
2974
|
+
else:
|
|
2975
|
+
return []
|
|
2976
|
+
return [(self.map_tensor_name(name), data_torch)]
|
|
2977
|
+
|
|
2978
|
+
def prepare_tensors(self):
|
|
2979
|
+
super().prepare_tensors()
|
|
2980
|
+
|
|
2981
|
+
if self._experts is not None:
|
|
2982
|
+
# flatten `list[dict[str, Tensor]]` into `list[str]`
|
|
2983
|
+
experts = [k for d in self._experts for k in d.keys()]
|
|
2984
|
+
if len(experts) > 0:
|
|
2985
|
+
raise ValueError(f"Unprocessed experts: {experts}")
|
|
2986
|
+
|
|
2987
|
+
|
|
2817
2988
|
@ModelBase.register(
|
|
2818
2989
|
"Qwen2VLModel",
|
|
2819
2990
|
"Qwen2VLForConditionalGeneration",
|
|
@@ -3501,6 +3672,175 @@ class PlamoModel(TextModel):
|
|
|
3501
3672
|
return [(new_name, data_torch)]
|
|
3502
3673
|
|
|
3503
3674
|
|
|
3675
|
+
@ModelBase.register("Plamo2ForCausalLM", "PLaMo2ForCausalLM")
|
|
3676
|
+
class Plamo2Model(TextModel):
|
|
3677
|
+
model_arch = gguf.MODEL_ARCH.PLAMO2
|
|
3678
|
+
|
|
3679
|
+
def set_vocab(self):
|
|
3680
|
+
# PLaMo 2 uses a custom tokenizer with a .jsonl file
|
|
3681
|
+
# We need to handle this specially
|
|
3682
|
+
tokenizer_jsonl_path = self.dir_model / "tokenizer.jsonl"
|
|
3683
|
+
tokenizer_config_path = self.dir_model / "tokenizer_config.json"
|
|
3684
|
+
|
|
3685
|
+
if not tokenizer_jsonl_path.is_file():
|
|
3686
|
+
raise FileNotFoundError(f"PLaMo 2 tokenizer file not found: {tokenizer_jsonl_path}")
|
|
3687
|
+
|
|
3688
|
+
# Load tokenizer config
|
|
3689
|
+
with open(tokenizer_config_path, 'r', encoding='utf-8') as f:
|
|
3690
|
+
tokenizer_config = json.load(f)
|
|
3691
|
+
|
|
3692
|
+
# Load tokens from JSONL file (actually a list format)
|
|
3693
|
+
tokens = []
|
|
3694
|
+
scores = []
|
|
3695
|
+
toktypes = []
|
|
3696
|
+
|
|
3697
|
+
with open(tokenizer_jsonl_path, 'r', encoding='utf-8') as f:
|
|
3698
|
+
for line_num, line in enumerate(f):
|
|
3699
|
+
if line.strip():
|
|
3700
|
+
token_data = json.loads(line)
|
|
3701
|
+
# Format: [token, score, type, ?, ?, ?, ?]
|
|
3702
|
+
token = token_data[0].encode("utf-8")
|
|
3703
|
+
score = float(token_data[1])
|
|
3704
|
+
token_type_str = token_data[2] if len(token_data) > 2 else "NORMAL"
|
|
3705
|
+
|
|
3706
|
+
tokens.append(token)
|
|
3707
|
+
scores.append(score)
|
|
3708
|
+
|
|
3709
|
+
# Map token type strings to GGUF token types
|
|
3710
|
+
if token_type_str == "UNKNOWN":
|
|
3711
|
+
toktypes.append(gguf.TokenType.UNKNOWN)
|
|
3712
|
+
elif token_type_str == "CONTROL":
|
|
3713
|
+
toktypes.append(gguf.TokenType.CONTROL)
|
|
3714
|
+
elif token_type_str == "BYTE":
|
|
3715
|
+
toktypes.append(gguf.TokenType.BYTE)
|
|
3716
|
+
else:
|
|
3717
|
+
# Check for PLaMo-2 special tokens
|
|
3718
|
+
token_str = token_data[0]
|
|
3719
|
+
if token_str.startswith("<|plamo:") and token_str.endswith("|>"):
|
|
3720
|
+
toktypes.append(gguf.TokenType.CONTROL)
|
|
3721
|
+
else:
|
|
3722
|
+
toktypes.append(gguf.TokenType.NORMAL)
|
|
3723
|
+
|
|
3724
|
+
vocab_size = self.hparams["vocab_size"]
|
|
3725
|
+
if vocab_size > len(tokens):
|
|
3726
|
+
pad_count = vocab_size - len(tokens)
|
|
3727
|
+
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
|
|
3728
|
+
for i in range(1, pad_count + 1):
|
|
3729
|
+
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
|
|
3730
|
+
scores.append(-1000.0)
|
|
3731
|
+
toktypes.append(gguf.TokenType.UNUSED)
|
|
3732
|
+
|
|
3733
|
+
# Use "plamo2" tokenizer type for PLaMo-2's custom Aho-Corasick tokenizer
|
|
3734
|
+
self.gguf_writer.add_tokenizer_model("plamo2")
|
|
3735
|
+
self.gguf_writer.add_tokenizer_pre("default")
|
|
3736
|
+
self.gguf_writer.add_token_list(tokens)
|
|
3737
|
+
self.gguf_writer.add_token_scores(scores)
|
|
3738
|
+
self.gguf_writer.add_token_types(toktypes)
|
|
3739
|
+
|
|
3740
|
+
# Add special tokens from config
|
|
3741
|
+
if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] is not None:
|
|
3742
|
+
token_id = tokens.index(tokenizer_config["bos_token"].encode("utf-8"))
|
|
3743
|
+
self.gguf_writer.add_bos_token_id(token_id)
|
|
3744
|
+
if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] is not None:
|
|
3745
|
+
token_id = tokens.index(tokenizer_config["eos_token"].encode("utf-8"))
|
|
3746
|
+
self.gguf_writer.add_eos_token_id(token_id)
|
|
3747
|
+
if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] is not None:
|
|
3748
|
+
token_id = tokens.index(tokenizer_config["pad_token"].encode("utf-8"))
|
|
3749
|
+
self.gguf_writer.add_pad_token_id(token_id)
|
|
3750
|
+
if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] is not None:
|
|
3751
|
+
token_id = tokens.index(tokenizer_config["sep_token"].encode("utf-8"))
|
|
3752
|
+
self.gguf_writer.add_sep_token_id(token_id)
|
|
3753
|
+
if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] is not None:
|
|
3754
|
+
token_id = tokens.index(tokenizer_config["unk_token"].encode("utf-8"))
|
|
3755
|
+
self.gguf_writer.add_unk_token_id(token_id)
|
|
3756
|
+
|
|
3757
|
+
# Add <|plamo:op|> as EOT to ensure appropriate end of generation
|
|
3758
|
+
self.gguf_writer.add_eot_token_id(4)
|
|
3759
|
+
|
|
3760
|
+
self.gguf_writer.add_add_space_prefix(False)
|
|
3761
|
+
|
|
3762
|
+
def set_gguf_parameters(self):
|
|
3763
|
+
hparams = self.hparams
|
|
3764
|
+
block_count = hparams["num_hidden_layers"]
|
|
3765
|
+
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
|
|
3766
|
+
|
|
3767
|
+
# Which layers are Mamba layers
|
|
3768
|
+
# PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
|
|
3769
|
+
# This logic matches modeling_plamo.py's is_mamba function
|
|
3770
|
+
mamba_step = hparams.get("mamba_step", 2)
|
|
3771
|
+
mamba_enabled = hparams.get("mamba_enabled", True)
|
|
3772
|
+
mamba_layers = []
|
|
3773
|
+
|
|
3774
|
+
if mamba_enabled:
|
|
3775
|
+
for i in range(block_count):
|
|
3776
|
+
if block_count <= (mamba_step // 2):
|
|
3777
|
+
# use attention in last layer
|
|
3778
|
+
is_mamba = (i != block_count - 1)
|
|
3779
|
+
else:
|
|
3780
|
+
is_mamba = (i % mamba_step) != (mamba_step // 2)
|
|
3781
|
+
if is_mamba:
|
|
3782
|
+
mamba_layers.append(0)
|
|
3783
|
+
else:
|
|
3784
|
+
mamba_layers.append(hparams.get("num_key_value_heads", 4))
|
|
3785
|
+
|
|
3786
|
+
if mamba_layers:
|
|
3787
|
+
self.gguf_writer.add_head_count_kv(mamba_layers)
|
|
3788
|
+
|
|
3789
|
+
self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 2048))
|
|
3790
|
+
self.gguf_writer.add_embedding_length(hparams.get("hidden_size", 4096))
|
|
3791
|
+
self.gguf_writer.add_block_count(block_count)
|
|
3792
|
+
self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32))
|
|
3793
|
+
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06))
|
|
3794
|
+
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 10000))
|
|
3795
|
+
|
|
3796
|
+
# Mamba parameters
|
|
3797
|
+
self.gguf_writer.add_ssm_state_size(hparams.get("mamba_d_state", 64))
|
|
3798
|
+
self.gguf_writer.add_ssm_conv_kernel(hparams.get("mamba_d_conv", 4))
|
|
3799
|
+
self.gguf_writer.add_ssm_time_step_rank(hparams.get("mamba_num_heads", 64))
|
|
3800
|
+
intermediate_size = hparams.get("mamba_num_heads", 64) * hparams.get("hidden_size_per_head", 128)
|
|
3801
|
+
self.gguf_writer.add_ssm_inner_size(intermediate_size)
|
|
3802
|
+
self.gguf_writer.add_ssm_group_count(0)
|
|
3803
|
+
|
|
3804
|
+
# MLP feed forward parameters (for attention layers)
|
|
3805
|
+
self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 13312))
|
|
3806
|
+
self.gguf_writer.add_file_type(self.ftype)
|
|
3807
|
+
|
|
3808
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
3809
|
+
del bid # unused
|
|
3810
|
+
|
|
3811
|
+
if name.endswith(".A_log"):
|
|
3812
|
+
data_torch = -torch.exp(data_torch)
|
|
3813
|
+
elif name.endswith(".dt_bias"):
|
|
3814
|
+
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
|
|
3815
|
+
elif name.endswith(".dt_norm_weight"):
|
|
3816
|
+
name = name.rpartition(".dt_norm_weight")[0] + ".dt_norm.weight"
|
|
3817
|
+
elif name.endswith(".B_norm_weight"):
|
|
3818
|
+
name = name.rpartition(".B_norm_weight")[0] + ".B_norm.weight"
|
|
3819
|
+
elif name.endswith(".C_norm_weight"):
|
|
3820
|
+
name = name.rpartition(".C_norm_weight")[0] + ".C_norm.weight"
|
|
3821
|
+
elif name.endswith(".k_weight"):
|
|
3822
|
+
name = name.rpartition(".k_weight")[0] + ".k.weight"
|
|
3823
|
+
elif name.endswith(".q_weight"):
|
|
3824
|
+
name = name.rpartition(".q_weight")[0] + ".q.weight"
|
|
3825
|
+
elif name.endswith(".conv1d.weight"):
|
|
3826
|
+
data_torch = torch.squeeze(data_torch) # remove (, 1, )
|
|
3827
|
+
assert data_torch.ndim == 2
|
|
3828
|
+
elif name.endswith(".pre_mixer_norm.weight"):
|
|
3829
|
+
data_torch += 1.0
|
|
3830
|
+
elif name.endswith(".post_mixer_norm.weight"):
|
|
3831
|
+
data_torch += 1.0 / 5
|
|
3832
|
+
elif name.endswith(".pre_mlp_norm.weight"):
|
|
3833
|
+
data_torch += 1.0
|
|
3834
|
+
elif name.endswith(".post_mlp_norm.weight"):
|
|
3835
|
+
data_torch += 1.0 / (5**1.5)
|
|
3836
|
+
elif name.endswith(".norm.weight"):
|
|
3837
|
+
data_torch += 1.0
|
|
3838
|
+
|
|
3839
|
+
new_name = self.map_tensor_name(name)
|
|
3840
|
+
|
|
3841
|
+
return [(new_name, data_torch)]
|
|
3842
|
+
|
|
3843
|
+
|
|
3504
3844
|
@ModelBase.register("CodeShellForCausalLM")
|
|
3505
3845
|
class CodeShellModel(TextModel):
|
|
3506
3846
|
model_arch = gguf.MODEL_ARCH.CODESHELL
|
|
@@ -5563,7 +5903,58 @@ class DeepseekV2Model(TextModel):
|
|
|
5563
5903
|
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
|
|
5564
5904
|
|
|
5565
5905
|
def set_vocab(self):
|
|
5566
|
-
|
|
5906
|
+
try:
|
|
5907
|
+
self._set_vocab_gpt2()
|
|
5908
|
+
return
|
|
5909
|
+
except Exception:
|
|
5910
|
+
pass
|
|
5911
|
+
|
|
5912
|
+
from transformers import AutoTokenizer
|
|
5913
|
+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
|
|
5914
|
+
tokpre = self.get_vocab_base_pre(tokenizer)
|
|
5915
|
+
|
|
5916
|
+
if tokpre == "kimi-k2":
|
|
5917
|
+
# Build merges list using the approach similar to HunYuanMoE
|
|
5918
|
+
merges = []
|
|
5919
|
+
vocab = {}
|
|
5920
|
+
mergeable_ranks = tokenizer.model._mergeable_ranks
|
|
5921
|
+
for token, rank in mergeable_ranks.items():
|
|
5922
|
+
vocab[QwenModel.token_bytes_to_string(token)] = rank
|
|
5923
|
+
if len(token) == 1:
|
|
5924
|
+
continue
|
|
5925
|
+
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
|
|
5926
|
+
if len(merged) == 2:
|
|
5927
|
+
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
|
|
5928
|
+
|
|
5929
|
+
# Build token list
|
|
5930
|
+
vocab_size = self.hparams["vocab_size"]
|
|
5931
|
+
special_tokens = tokenizer.special_tokens
|
|
5932
|
+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
|
|
5933
|
+
tokens: list[str] = []
|
|
5934
|
+
toktypes: list[int] = []
|
|
5935
|
+
|
|
5936
|
+
for i in range(vocab_size):
|
|
5937
|
+
if i not in reverse_vocab:
|
|
5938
|
+
tokens.append(f"[PAD{i}]")
|
|
5939
|
+
toktypes.append(gguf.TokenType.UNUSED)
|
|
5940
|
+
else:
|
|
5941
|
+
token = reverse_vocab[i]
|
|
5942
|
+
tokens.append(token)
|
|
5943
|
+
if i in special_tokens.values():
|
|
5944
|
+
toktypes.append(gguf.TokenType.CONTROL)
|
|
5945
|
+
else:
|
|
5946
|
+
toktypes.append(gguf.TokenType.NORMAL)
|
|
5947
|
+
|
|
5948
|
+
self.gguf_writer.add_tokenizer_model("gpt2")
|
|
5949
|
+
self.gguf_writer.add_tokenizer_pre(tokpre)
|
|
5950
|
+
self.gguf_writer.add_token_list(tokens)
|
|
5951
|
+
self.gguf_writer.add_token_types(toktypes)
|
|
5952
|
+
self.gguf_writer.add_token_merges(merges)
|
|
5953
|
+
|
|
5954
|
+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
|
|
5955
|
+
special_vocab.add_to_gguf(self.gguf_writer)
|
|
5956
|
+
else:
|
|
5957
|
+
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
|
|
5567
5958
|
|
|
5568
5959
|
def set_gguf_parameters(self):
|
|
5569
5960
|
|
|
@@ -6095,7 +6486,7 @@ class JaisModel(TextModel):
|
|
|
6095
6486
|
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
|
6096
6487
|
|
|
6097
6488
|
|
|
6098
|
-
@ModelBase.register("Glm4ForCausalLM")
|
|
6489
|
+
@ModelBase.register("Glm4ForCausalLM", "Glm4vForConditionalGeneration")
|
|
6099
6490
|
class Glm4Model(TextModel):
|
|
6100
6491
|
model_arch = gguf.MODEL_ARCH.GLM4
|
|
6101
6492
|
|
|
@@ -6117,7 +6508,8 @@ class Glm4Model(TextModel):
|
|
|
6117
6508
|
|
|
6118
6509
|
def set_gguf_parameters(self):
|
|
6119
6510
|
super().set_gguf_parameters()
|
|
6120
|
-
rope_dim
|
|
6511
|
+
if (rope_dim := self.hparams.get("head_dim")) is None:
|
|
6512
|
+
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
|
6121
6513
|
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
|
|
6122
6514
|
rope_scaling = self.hparams.get("rope_scaling") or {}
|
|
6123
6515
|
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
|
|
@@ -6125,6 +6517,13 @@ class Glm4Model(TextModel):
|
|
|
6125
6517
|
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
|
|
6126
6518
|
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
|
|
6127
6519
|
|
|
6520
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
6521
|
+
if name.startswith("model.visual."): # ignore visual part of Glm4v
|
|
6522
|
+
return []
|
|
6523
|
+
elif name.startswith("model.language_model."):
|
|
6524
|
+
name = name.replace("language_model.", "") # for Glm4v
|
|
6525
|
+
return super().modify_tensors(data_torch, name, bid)
|
|
6526
|
+
|
|
6128
6527
|
|
|
6129
6528
|
@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
|
|
6130
6529
|
class ChatGLMModel(TextModel):
|
|
@@ -6392,6 +6791,75 @@ class ExaoneModel(TextModel):
|
|
|
6392
6791
|
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
|
|
6393
6792
|
|
|
6394
6793
|
|
|
6794
|
+
@ModelBase.register("Exaone4ForCausalLM")
|
|
6795
|
+
class Exaone4Model(TextModel):
|
|
6796
|
+
model_arch = gguf.MODEL_ARCH.EXAONE4
|
|
6797
|
+
|
|
6798
|
+
def set_vocab(self):
|
|
6799
|
+
tokens, toktypes, tokpre = self.get_vocab_base()
|
|
6800
|
+
self.gguf_writer.add_tokenizer_model("gpt2")
|
|
6801
|
+
self.gguf_writer.add_tokenizer_pre(tokpre)
|
|
6802
|
+
self.gguf_writer.add_token_list(tokens)
|
|
6803
|
+
self.gguf_writer.add_token_types(toktypes)
|
|
6804
|
+
|
|
6805
|
+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
|
|
6806
|
+
special_vocab.add_to_gguf(self.gguf_writer)
|
|
6807
|
+
|
|
6808
|
+
def set_gguf_parameters(self):
|
|
6809
|
+
super().set_gguf_parameters()
|
|
6810
|
+
hparams = self.hparams
|
|
6811
|
+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
|
|
6812
|
+
|
|
6813
|
+
if hparams.get("sliding_window") is not None:
|
|
6814
|
+
self.gguf_writer.add_sliding_window(hparams["sliding_window"])
|
|
6815
|
+
if "layer_types" in hparams:
|
|
6816
|
+
self.gguf_writer.add_sliding_window_pattern([t == "sliding_attention" for t in hparams["layer_types"]])
|
|
6817
|
+
elif "sliding_window_pattern" in hparams:
|
|
6818
|
+
sliding_window_pattern = []
|
|
6819
|
+
if isinstance(hparams["sliding_window_pattern"], str): # e.g. LLLG
|
|
6820
|
+
for i in range(hparams["num_hidden_layers"]):
|
|
6821
|
+
sliding_window_pattern.append(hparams["sliding_window_pattern"][i % len(hparams["sliding_window_pattern"])] == "L")
|
|
6822
|
+
if isinstance(hparams["sliding_window_pattern"], int): # e.g. 4
|
|
6823
|
+
for i in range(hparams["num_hidden_layers"]):
|
|
6824
|
+
sliding_window_pattern.append((i + 1) % hparams["sliding_window_pattern"] != 0)
|
|
6825
|
+
if len(sliding_window_pattern) == hparams["num_hidden_layers"]:
|
|
6826
|
+
self.gguf_writer.add_sliding_window_pattern(sliding_window_pattern)
|
|
6827
|
+
|
|
6828
|
+
rope_scaling = self.hparams.get("rope_scaling") or {}
|
|
6829
|
+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
|
|
6830
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
|
6831
|
+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
|
|
6832
|
+
|
|
6833
|
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
|
6834
|
+
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
|
|
6835
|
+
if rope_scaling.get("rope_type", '').lower() == "llama3":
|
|
6836
|
+
base = self.hparams.get("rope_theta", 10_000.0)
|
|
6837
|
+
if (dim := self.hparams.get("head_dim")) is None:
|
|
6838
|
+
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
|
6839
|
+
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
|
|
6840
|
+
|
|
6841
|
+
factor = rope_scaling.get("factor", 16.0)
|
|
6842
|
+
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
|
|
6843
|
+
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
|
|
6844
|
+
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
|
|
6845
|
+
|
|
6846
|
+
low_freq_wavelen = old_context_len / low_freq_factor
|
|
6847
|
+
high_freq_wavelen = old_context_len / high_freq_factor
|
|
6848
|
+
|
|
6849
|
+
rope_factors = []
|
|
6850
|
+
for freq in freqs:
|
|
6851
|
+
wavelen = 2 * math.pi / freq
|
|
6852
|
+
if wavelen < high_freq_wavelen:
|
|
6853
|
+
rope_factors.append(1)
|
|
6854
|
+
elif wavelen > low_freq_wavelen:
|
|
6855
|
+
rope_factors.append(factor)
|
|
6856
|
+
else:
|
|
6857
|
+
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
|
|
6858
|
+
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
|
|
6859
|
+
|
|
6860
|
+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
|
|
6861
|
+
|
|
6862
|
+
|
|
6395
6863
|
@ModelBase.register("GraniteForCausalLM")
|
|
6396
6864
|
class GraniteModel(LlamaModel):
|
|
6397
6865
|
"""Conversion for IBM's GraniteForCausalLM"""
|