@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -45,7 +45,7 @@ class SentencePieceTokenTypes(IntEnum):
|
|
|
45
45
|
|
|
46
46
|
class ModelType(IntEnum):
|
|
47
47
|
TEXT = 1
|
|
48
|
-
|
|
48
|
+
MMPROJ = 2
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
AnyModel = TypeVar("AnyModel", bound="type[ModelBase]")
|
|
@@ -54,7 +54,7 @@ AnyModel = TypeVar("AnyModel", bound="type[ModelBase]")
|
|
|
54
54
|
class ModelBase:
|
|
55
55
|
_model_classes: dict[ModelType, dict[str, type[ModelBase]]] = {
|
|
56
56
|
ModelType.TEXT: {},
|
|
57
|
-
ModelType.
|
|
57
|
+
ModelType.MMPROJ: {},
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
dir_model: Path
|
|
@@ -88,7 +88,7 @@ class ModelBase:
|
|
|
88
88
|
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None):
|
|
89
89
|
if type(self) is ModelBase or \
|
|
90
90
|
type(self) is TextModel or \
|
|
91
|
-
type(self) is
|
|
91
|
+
type(self) is MmprojModel:
|
|
92
92
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
|
93
93
|
|
|
94
94
|
self.dir_model = dir_model
|
|
@@ -308,6 +308,8 @@ class ModelBase:
|
|
|
308
308
|
gguf.MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
|
309
309
|
gguf.MODEL_TENSOR.POSNET_NORM1,
|
|
310
310
|
gguf.MODEL_TENSOR.POSNET_NORM2,
|
|
311
|
+
gguf.MODEL_TENSOR.V_ENC_EMBD_POS,
|
|
312
|
+
gguf.MODEL_TENSOR.A_ENC_EMBD_POS,
|
|
311
313
|
)
|
|
312
314
|
)
|
|
313
315
|
or not new_name.endswith(".weight")
|
|
@@ -421,23 +423,26 @@ class ModelBase:
|
|
|
421
423
|
try:
|
|
422
424
|
# for security reason, we don't allow loading remote code by default
|
|
423
425
|
# if a model need remote code, we will fallback to config.json
|
|
424
|
-
|
|
426
|
+
config = AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
|
|
425
427
|
except Exception as e:
|
|
426
428
|
logger.warning(f"Failed to load model config from {dir_model}: {e}")
|
|
427
429
|
logger.warning("Trying to load config.json instead")
|
|
428
430
|
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
|
429
431
|
config = json.load(f)
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
432
|
+
if "llm_config" in config:
|
|
433
|
+
# rename for InternVL
|
|
434
|
+
config["text_config"] = config["llm_config"]
|
|
435
|
+
if "thinker_config" in config:
|
|
436
|
+
# rename for Qwen2.5-Omni
|
|
437
|
+
config["text_config"] = config["thinker_config"]["text_config"]
|
|
438
|
+
return config
|
|
434
439
|
|
|
435
440
|
@classmethod
|
|
436
441
|
def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
|
|
437
442
|
assert names
|
|
438
443
|
|
|
439
444
|
def func(modelcls: AnyModel) -> AnyModel:
|
|
440
|
-
model_type = ModelType.
|
|
445
|
+
model_type = ModelType.MMPROJ if modelcls.model_arch == gguf.MODEL_ARCH.MMPROJ else ModelType.TEXT
|
|
441
446
|
for name in names:
|
|
442
447
|
cls._model_classes[model_type][name] = modelcls
|
|
443
448
|
return modelcls
|
|
@@ -518,15 +523,15 @@ class TextModel(ModelBase):
|
|
|
518
523
|
self.gguf_writer.add_context_length(n_ctx)
|
|
519
524
|
logger.info(f"gguf: context length = {n_ctx}")
|
|
520
525
|
|
|
521
|
-
if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
|
|
526
|
+
if (n_embd := self.find_hparam(["hidden_size", "n_embd", "dim"], optional=True)) is not None:
|
|
522
527
|
self.gguf_writer.add_embedding_length(n_embd)
|
|
523
528
|
logger.info(f"gguf: embedding length = {n_embd}")
|
|
524
529
|
|
|
525
|
-
if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
|
|
530
|
+
if (n_ff := self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"], optional=True)) is not None:
|
|
526
531
|
self.gguf_writer.add_feed_forward_length(n_ff)
|
|
527
532
|
logger.info(f"gguf: feed forward length = {n_ff}")
|
|
528
533
|
|
|
529
|
-
if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
|
|
534
|
+
if (n_head := self.find_hparam(["num_attention_heads", "n_head", "n_heads"], optional=True)) is not None:
|
|
530
535
|
self.gguf_writer.add_head_count(n_head)
|
|
531
536
|
logger.info(f"gguf: head count = {n_head}")
|
|
532
537
|
|
|
@@ -669,12 +674,12 @@ class TextModel(ModelBase):
|
|
|
669
674
|
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
|
|
670
675
|
# ref: https://huggingface.co/tiiuae/falcon-7b
|
|
671
676
|
res = "falcon"
|
|
672
|
-
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
|
|
673
|
-
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
|
|
674
|
-
res = "falcon3"
|
|
675
677
|
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
|
|
676
678
|
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
|
|
677
679
|
res = "bert-bge"
|
|
680
|
+
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
|
|
681
|
+
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
|
|
682
|
+
res = "falcon3"
|
|
678
683
|
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
|
|
679
684
|
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
|
|
680
685
|
res = "bert-bge-large"
|
|
@@ -726,9 +731,6 @@ class TextModel(ModelBase):
|
|
|
726
731
|
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
|
|
727
732
|
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
|
|
728
733
|
res = "jina-v2-code"
|
|
729
|
-
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
|
|
730
|
-
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
731
|
-
res = "chatglm-bpe"
|
|
732
734
|
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
|
|
733
735
|
# ref: https://huggingface.co/LumiOpen/Viking-7B
|
|
734
736
|
res = "viking"
|
|
@@ -759,9 +761,6 @@ class TextModel(ModelBase):
|
|
|
759
761
|
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
|
|
760
762
|
# ref: https://huggingface.co/facebook/chameleon-7b
|
|
761
763
|
res = "chameleon"
|
|
762
|
-
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
|
763
|
-
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
|
764
|
-
res = "minerva-7b"
|
|
765
764
|
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
|
|
766
765
|
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
|
|
767
766
|
res = "roberta-bpe"
|
|
@@ -792,15 +791,24 @@ class TextModel(ModelBase):
|
|
|
792
791
|
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
|
|
793
792
|
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
|
794
793
|
res = "llama4"
|
|
795
|
-
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
|
|
796
|
-
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
|
|
797
|
-
res = "glm4"
|
|
798
794
|
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
|
|
799
795
|
# ref: https://huggingface.co/mistral-community/pixtral-12b
|
|
800
796
|
res = "pixtral"
|
|
801
797
|
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
|
|
802
798
|
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
|
|
803
799
|
res = "seed-coder"
|
|
800
|
+
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
|
|
801
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
802
|
+
res = "chatglm-bpe"
|
|
803
|
+
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
|
|
804
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
805
|
+
res = "chatglm-bpe"
|
|
806
|
+
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
|
|
807
|
+
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
|
|
808
|
+
res = "glm4"
|
|
809
|
+
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
|
810
|
+
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
|
811
|
+
res = "minerva-7b"
|
|
804
812
|
|
|
805
813
|
if res is None:
|
|
806
814
|
logger.warning("\n")
|
|
@@ -1113,60 +1121,116 @@ class TextModel(ModelBase):
|
|
|
1113
1121
|
self.gguf_writer.add_pooling_type(pooling_type)
|
|
1114
1122
|
|
|
1115
1123
|
|
|
1116
|
-
class
|
|
1117
|
-
model_type = ModelType.
|
|
1118
|
-
model_arch = gguf.MODEL_ARCH.
|
|
1124
|
+
class MmprojModel(ModelBase):
|
|
1125
|
+
model_type = ModelType.MMPROJ
|
|
1126
|
+
model_arch = gguf.MODEL_ARCH.MMPROJ
|
|
1119
1127
|
preprocessor_config: dict[str, Any]
|
|
1120
1128
|
global_config: dict[str, Any]
|
|
1121
1129
|
|
|
1130
|
+
n_block_keys = ["n_layers", "num_hidden_layers", "n_layer", "num_layers", "depth"]
|
|
1131
|
+
|
|
1132
|
+
has_vision_encoder: bool = True # by default
|
|
1133
|
+
has_audio_encoder: bool = False
|
|
1134
|
+
|
|
1135
|
+
# for models having multiple encoders, we need to separate their hparams
|
|
1136
|
+
hparams_vision: dict[str, Any] | None = None
|
|
1137
|
+
hparams_audio: dict[str, Any] | None = None
|
|
1138
|
+
|
|
1122
1139
|
def __init__(self, *args, **kwargs):
|
|
1123
1140
|
super().__init__(*args, **kwargs)
|
|
1124
1141
|
|
|
1125
|
-
if self.model_arch != gguf.MODEL_ARCH.
|
|
1126
|
-
raise TypeError("
|
|
1142
|
+
if self.model_arch != gguf.MODEL_ARCH.MMPROJ:
|
|
1143
|
+
raise TypeError("MmprojModel must be subclassed with model_arch = gguf.MODEL_ARCH.MMPROJ")
|
|
1127
1144
|
|
|
1128
1145
|
# get n_embd of the text model
|
|
1129
1146
|
if "text_config" not in self.hparams:
|
|
1130
1147
|
self.hparams["text_config"] = {}
|
|
1148
|
+
if "audio_config" not in self.hparams:
|
|
1149
|
+
self.hparams["audio_config"] = {}
|
|
1131
1150
|
text_config = {**self.hparams, **self.hparams["text_config"]}
|
|
1132
1151
|
self.n_embd_text = text_config.get("hidden_size", text_config.get("n_embd", 0))
|
|
1133
1152
|
assert self.n_embd_text > 0, "n_embd not found in hparams"
|
|
1134
1153
|
|
|
1135
|
-
if "vision_config" not in self.hparams:
|
|
1136
|
-
raise ValueError("vision_config not found in hparams")
|
|
1137
1154
|
# move vision config to the top level, while preserving the original hparams in global_config
|
|
1138
|
-
|
|
1139
|
-
self.
|
|
1155
|
+
import copy
|
|
1156
|
+
self.global_config = copy.deepcopy(self.hparams)
|
|
1157
|
+
self.hparams_vision = self.get_vision_config()
|
|
1158
|
+
self.hparams_audio = self.get_audio_config()
|
|
1159
|
+
|
|
1160
|
+
if self.hparams_vision is None and self.hparams_audio is None:
|
|
1161
|
+
raise ValueError("vision_config / audio_config not found in hparams")
|
|
1162
|
+
|
|
1163
|
+
# for compat with vision-only models
|
|
1164
|
+
self.hparams = self.hparams_vision or self.hparams_audio or self.hparams
|
|
1140
1165
|
|
|
1141
|
-
|
|
1142
|
-
|
|
1166
|
+
# TODO @ngxson : this is a hack to support both vision and audio encoders
|
|
1167
|
+
have_multiple_encoders = self.has_audio_encoder and self.has_vision_encoder
|
|
1168
|
+
self.block_count = 128 if have_multiple_encoders else self.find_hparam(self.n_block_keys, True)
|
|
1169
|
+
self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.MMPROJ, self.block_count)
|
|
1143
1170
|
|
|
1144
1171
|
# load preprocessor config
|
|
1145
1172
|
with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f:
|
|
1146
1173
|
self.preprocessor_config = json.load(f)
|
|
1147
1174
|
|
|
1175
|
+
def get_vision_config(self) -> dict[str, Any] | None:
|
|
1176
|
+
return self.global_config.get("vision_config")
|
|
1177
|
+
|
|
1178
|
+
def get_audio_config(self) -> dict[str, Any] | None:
|
|
1179
|
+
return self.global_config.get("audio_config")
|
|
1180
|
+
|
|
1148
1181
|
def set_type(self):
|
|
1149
|
-
self.gguf_writer.add_type(gguf.GGUFType.
|
|
1182
|
+
self.gguf_writer.add_type(gguf.GGUFType.MMPROJ)
|
|
1150
1183
|
|
|
1151
1184
|
def set_gguf_parameters(self):
|
|
1152
1185
|
self.gguf_writer.add_file_type(self.ftype)
|
|
1153
|
-
self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
|
|
1154
|
-
self.gguf_writer.add_vision_has_vision_encoder(True)
|
|
1155
1186
|
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1187
|
+
if self.has_vision_encoder:
|
|
1188
|
+
self.gguf_writer.add_clip_has_vision_encoder(True)
|
|
1189
|
+
self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
|
|
1190
|
+
|
|
1191
|
+
# vision config
|
|
1192
|
+
self.gguf_writer.add_vision_image_size(self.find_vparam(["image_size"]))
|
|
1193
|
+
self.gguf_writer.add_vision_patch_size(self.find_vparam(["patch_size"]))
|
|
1194
|
+
self.gguf_writer.add_vision_embedding_length(self.find_vparam(["hidden_size"]))
|
|
1195
|
+
self.gguf_writer.add_vision_feed_forward_length(self.find_vparam(["intermediate_size"]))
|
|
1196
|
+
self.gguf_writer.add_vision_block_count(self.find_vparam(self.n_block_keys))
|
|
1197
|
+
self.gguf_writer.add_vision_head_count(self.find_vparam(["num_attention_heads"]))
|
|
1198
|
+
|
|
1199
|
+
# preprocessor config
|
|
1200
|
+
self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"])
|
|
1201
|
+
self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"])
|
|
1202
|
+
|
|
1203
|
+
if self.has_audio_encoder:
|
|
1204
|
+
self.gguf_writer.add_clip_has_audio_encoder(True)
|
|
1205
|
+
self.gguf_writer.add_audio_projection_dim(self.n_embd_text)
|
|
1206
|
+
|
|
1207
|
+
# audio config
|
|
1208
|
+
self.gguf_writer.add_audio_embedding_length(self.find_aparam(["hidden_size"]))
|
|
1209
|
+
self.gguf_writer.add_audio_feed_forward_length(self.find_aparam(["intermediate_size"]))
|
|
1210
|
+
self.gguf_writer.add_audio_block_count(self.find_aparam(self.n_block_keys))
|
|
1211
|
+
self.gguf_writer.add_audio_head_count(self.find_aparam(["num_attention_heads"]))
|
|
1163
1212
|
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"])
|
|
1213
|
+
if not self.has_vision_encoder and not self.has_audio_encoder:
|
|
1214
|
+
raise ValueError("MmprojModel must have either vision or audio encoder")
|
|
1167
1215
|
|
|
1168
1216
|
def write_vocab(self):
|
|
1169
|
-
raise ValueError("
|
|
1217
|
+
raise ValueError("MmprojModel does not support vocab writing")
|
|
1218
|
+
|
|
1219
|
+
def find_vparam(self, keys: Iterable[str], optional: bool = False) -> Any:
|
|
1220
|
+
assert self.hparams_vision is not None
|
|
1221
|
+
return self._find_param(self.hparams_vision, keys, optional)
|
|
1222
|
+
|
|
1223
|
+
def find_aparam(self, keys: Iterable[str], optional: bool = False) -> Any:
|
|
1224
|
+
assert self.hparams_audio is not None
|
|
1225
|
+
return self._find_param(self.hparams_audio, keys, optional)
|
|
1226
|
+
|
|
1227
|
+
def _find_param(self, obj: dict[str, Any], keys: Iterable[str], optional: bool = False) -> Any:
|
|
1228
|
+
key = next((k for k in keys if k in obj), None)
|
|
1229
|
+
if key is not None:
|
|
1230
|
+
return obj[key]
|
|
1231
|
+
if optional:
|
|
1232
|
+
return None
|
|
1233
|
+
raise KeyError(f"could not find any of: {keys}")
|
|
1170
1234
|
|
|
1171
1235
|
|
|
1172
1236
|
@ModelBase.register("GPTNeoXForCausalLM")
|
|
@@ -1780,7 +1844,8 @@ class StableLMModel(TextModel):
|
|
|
1780
1844
|
"MistralForCausalLM",
|
|
1781
1845
|
"MixtralForCausalLM",
|
|
1782
1846
|
"VLlama3ForCausalLM",
|
|
1783
|
-
"LlavaForConditionalGeneration"
|
|
1847
|
+
"LlavaForConditionalGeneration",
|
|
1848
|
+
"LlamaModel")
|
|
1784
1849
|
class LlamaModel(TextModel):
|
|
1785
1850
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
|
1786
1851
|
undo_permute = True
|
|
@@ -1860,6 +1925,8 @@ class LlamaModel(TextModel):
|
|
|
1860
1925
|
|
|
1861
1926
|
if is_vision_tensor:
|
|
1862
1927
|
return [] # skip vision tensors
|
|
1928
|
+
elif self.hf_arch == "LlamaModel":
|
|
1929
|
+
name = "model." + name
|
|
1863
1930
|
elif name.startswith("model.text_model"):
|
|
1864
1931
|
name = name.replace("text_model.", "") # for SmolVLM
|
|
1865
1932
|
elif name.startswith("language_model."):
|
|
@@ -1950,7 +2017,7 @@ class LlamaModel(TextModel):
|
|
|
1950
2017
|
"LlavaForConditionalGeneration", # pixtral
|
|
1951
2018
|
"Mistral3ForConditionalGeneration", # mistral small 3.1
|
|
1952
2019
|
)
|
|
1953
|
-
class LlavaVisionModel(
|
|
2020
|
+
class LlavaVisionModel(MmprojModel):
|
|
1954
2021
|
img_break_tok_id = -1
|
|
1955
2022
|
|
|
1956
2023
|
def __init__(self, *args, **kwargs):
|
|
@@ -1976,7 +2043,7 @@ class LlavaVisionModel(VisionModel):
|
|
|
1976
2043
|
super().set_gguf_parameters()
|
|
1977
2044
|
hparams = self.hparams
|
|
1978
2045
|
if hparams["model_type"] == "pixtral":
|
|
1979
|
-
self.gguf_writer.
|
|
2046
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.PIXTRAL)
|
|
1980
2047
|
self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
|
|
1981
2048
|
|
|
1982
2049
|
# hidden_act
|
|
@@ -2015,7 +2082,7 @@ class LlavaVisionModel(VisionModel):
|
|
|
2015
2082
|
|
|
2016
2083
|
|
|
2017
2084
|
@ModelBase.register("Idefics3ForConditionalGeneration", "SmolVLMForConditionalGeneration")
|
|
2018
|
-
class SmolVLMModel(
|
|
2085
|
+
class SmolVLMModel(MmprojModel):
|
|
2019
2086
|
def __init__(self, *args, **kwargs):
|
|
2020
2087
|
super().__init__(*args, **kwargs)
|
|
2021
2088
|
if self.hparams["model_type"] == "smolvlm_vision":
|
|
@@ -2027,7 +2094,7 @@ class SmolVLMModel(VisionModel):
|
|
|
2027
2094
|
|
|
2028
2095
|
def set_gguf_parameters(self):
|
|
2029
2096
|
super().set_gguf_parameters()
|
|
2030
|
-
self.gguf_writer.
|
|
2097
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.IDEFICS3)
|
|
2031
2098
|
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
|
|
2032
2099
|
self.gguf_writer.add_vision_projector_scale_factor(self.global_config.get("scale_factor", 2))
|
|
2033
2100
|
self.gguf_writer.add_vision_use_gelu(True)
|
|
@@ -2069,6 +2136,9 @@ class Llama4Model(LlamaModel):
|
|
|
2069
2136
|
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
|
|
2070
2137
|
|
|
2071
2138
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
|
2139
|
+
if name.startswith("language_model."):
|
|
2140
|
+
name = name.replace("language_model.", "")
|
|
2141
|
+
|
|
2072
2142
|
# split the gate_up into gate and up
|
|
2073
2143
|
if "gate_up_proj" in name:
|
|
2074
2144
|
name_up = name.replace("gate_up_proj", "up_proj.weight")
|
|
@@ -2089,6 +2159,29 @@ class Llama4Model(LlamaModel):
|
|
|
2089
2159
|
return super().modify_tensors(data_torch, name, bid)
|
|
2090
2160
|
|
|
2091
2161
|
|
|
2162
|
+
@ModelBase.register("Llama4ForConditionalGeneration")
|
|
2163
|
+
class Llama4VisionModel(MmprojModel):
|
|
2164
|
+
def set_gguf_parameters(self):
|
|
2165
|
+
super().set_gguf_parameters()
|
|
2166
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.LLAMA4)
|
|
2167
|
+
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams["norm_eps"])
|
|
2168
|
+
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / self.hparams["pixel_shuffle_ratio"]))
|
|
2169
|
+
assert self.hparams["hidden_act"] == "gelu"
|
|
2170
|
+
self.gguf_writer.add_vision_use_gelu(True)
|
|
2171
|
+
|
|
2172
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2173
|
+
del bid # unused
|
|
2174
|
+
if "multi_modal_projector" in name or "vision_model" in name:
|
|
2175
|
+
# process vision tensors
|
|
2176
|
+
if "positional_embedding_vlm" in name and ".weight" not in name:
|
|
2177
|
+
name += ".weight"
|
|
2178
|
+
if "multi_modal_projector.linear_1" in name:
|
|
2179
|
+
# despite the name with number postfix, this is a single fully connected layer
|
|
2180
|
+
return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)]
|
|
2181
|
+
return [(self.map_tensor_name(name), data_torch)]
|
|
2182
|
+
return []
|
|
2183
|
+
|
|
2184
|
+
|
|
2092
2185
|
@ModelBase.register("Mistral3ForConditionalGeneration")
|
|
2093
2186
|
class Mistral3Model(LlamaModel):
|
|
2094
2187
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
|
@@ -2591,7 +2684,7 @@ class QwenModel(TextModel):
|
|
|
2591
2684
|
self.gguf_writer.add_file_type(self.ftype)
|
|
2592
2685
|
|
|
2593
2686
|
|
|
2594
|
-
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM")
|
|
2687
|
+
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration")
|
|
2595
2688
|
class Qwen2Model(TextModel):
|
|
2596
2689
|
model_arch = gguf.MODEL_ARCH.QWEN2
|
|
2597
2690
|
|
|
@@ -2615,13 +2708,19 @@ class Qwen2Model(TextModel):
|
|
|
2615
2708
|
name = f"model.{name}" # map to Qwen2ForCausalLM tensors
|
|
2616
2709
|
if "language_model." in name:
|
|
2617
2710
|
name = name.replace("language_model.", "") # for InternVL
|
|
2618
|
-
if name.startswith("mlp") or name.startswith("
|
|
2619
|
-
|
|
2711
|
+
if name.startswith("mlp") or name.startswith("multi_modal_projector") \
|
|
2712
|
+
or name.startswith("vision_model") or name.startswith("audio_tower"):
|
|
2713
|
+
# skip vision and audio tensors
|
|
2620
2714
|
return []
|
|
2621
2715
|
yield from super().modify_tensors(data_torch, name, bid)
|
|
2622
2716
|
|
|
2623
2717
|
|
|
2624
|
-
@ModelBase.register(
|
|
2718
|
+
@ModelBase.register(
|
|
2719
|
+
"Qwen2VLModel",
|
|
2720
|
+
"Qwen2VLForConditionalGeneration",
|
|
2721
|
+
"Qwen2_5_VLForConditionalGeneration",
|
|
2722
|
+
"Qwen2_5OmniModel",
|
|
2723
|
+
)
|
|
2625
2724
|
class Qwen2VLModel(TextModel):
|
|
2626
2725
|
model_arch = gguf.MODEL_ARCH.QWEN2VL
|
|
2627
2726
|
|
|
@@ -2639,31 +2738,40 @@ class Qwen2VLModel(TextModel):
|
|
|
2639
2738
|
|
|
2640
2739
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2641
2740
|
del bid # unused
|
|
2642
|
-
if name.startswith("
|
|
2643
|
-
|
|
2741
|
+
if name.startswith("thinker."):
|
|
2742
|
+
name = name.replace("thinker.", "")
|
|
2743
|
+
if name.startswith("visual") or name.startswith("audio") or \
|
|
2744
|
+
name.startswith("talker") or name.startswith("token2wav"):
|
|
2745
|
+
# skip multimodal tensors
|
|
2644
2746
|
return []
|
|
2645
2747
|
return [(self.map_tensor_name(name), data_torch)]
|
|
2646
2748
|
|
|
2647
2749
|
|
|
2648
|
-
@ModelBase.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
|
|
2649
|
-
class Qwen2VLVisionModel(
|
|
2750
|
+
@ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
|
|
2751
|
+
class Qwen2VLVisionModel(MmprojModel):
|
|
2650
2752
|
def __init__(self, *args, **kwargs):
|
|
2651
2753
|
super().__init__(*args, **kwargs)
|
|
2652
|
-
self.
|
|
2754
|
+
assert self.hparams_vision is not None
|
|
2755
|
+
self.hparams_vision["image_size"] = self.hparams_vision.get("image_size", 560)
|
|
2653
2756
|
# rename config.json values
|
|
2654
|
-
self.
|
|
2655
|
-
self.
|
|
2656
|
-
if "embed_dim" in self.
|
|
2657
|
-
self.
|
|
2658
|
-
self.
|
|
2757
|
+
self.hparams_vision["num_attention_heads"] = self.hparams_vision.get("num_heads")
|
|
2758
|
+
self.hparams_vision["num_hidden_layers"] = self.hparams_vision.get("depth")
|
|
2759
|
+
if "embed_dim" in self.hparams_vision: # qwen2vl
|
|
2760
|
+
self.hparams_vision["intermediate_size"] = self.hparams_vision.get("hidden_size")
|
|
2761
|
+
self.hparams_vision["hidden_size"] = self.hparams_vision.get("embed_dim")
|
|
2659
2762
|
|
|
2660
2763
|
def set_gguf_parameters(self):
|
|
2661
2764
|
super().set_gguf_parameters()
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
self.gguf_writer.
|
|
2765
|
+
assert self.hparams_vision is not None
|
|
2766
|
+
hparams = self.hparams_vision
|
|
2767
|
+
model_type = self.global_config['model_type']
|
|
2768
|
+
if model_type == 'qwen2_vl':
|
|
2769
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2VL)
|
|
2770
|
+
elif model_type == 'qwen2_5_vl' or model_type == 'qwen2_5_omni':
|
|
2771
|
+
if model_type == 'qwen2_5_omni':
|
|
2772
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN25O)
|
|
2773
|
+
else:
|
|
2774
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN25VL)
|
|
2667
2775
|
self.gguf_writer.add_vision_use_silu(True)
|
|
2668
2776
|
# find n_wa_pattern (window attention pattern)
|
|
2669
2777
|
fullatt_block_indexes = hparams.get("fullatt_block_indexes")
|
|
@@ -2721,12 +2829,72 @@ class Qwen2VLVisionModel(VisionModel):
|
|
|
2721
2829
|
return [] # skip other tensors
|
|
2722
2830
|
|
|
2723
2831
|
|
|
2832
|
+
@ModelBase.register("Qwen2_5OmniModel")
|
|
2833
|
+
class Qwen25OmniModel(Qwen2VLVisionModel):
|
|
2834
|
+
has_vision_encoder = True
|
|
2835
|
+
has_audio_encoder = True
|
|
2836
|
+
|
|
2837
|
+
def __init__(self, *args, **kwargs):
|
|
2838
|
+
super().__init__(*args, **kwargs)
|
|
2839
|
+
assert self.hparams_audio is not None
|
|
2840
|
+
self.hparams_audio["hidden_size"] = self.hparams_audio["d_model"]
|
|
2841
|
+
self.hparams_audio["intermediate_size"] = self.hparams_audio["encoder_ffn_dim"]
|
|
2842
|
+
self.hparams_audio["num_attention_heads"] = self.hparams_audio["encoder_attention_heads"]
|
|
2843
|
+
|
|
2844
|
+
def set_gguf_parameters(self):
|
|
2845
|
+
super().set_gguf_parameters()
|
|
2846
|
+
assert self.hparams_audio is not None
|
|
2847
|
+
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["num_mel_bins"])
|
|
2848
|
+
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-5))
|
|
2849
|
+
|
|
2850
|
+
def get_vision_config(self) -> dict[str, Any] | None:
|
|
2851
|
+
return self.global_config["thinker_config"].get("vision_config")
|
|
2852
|
+
|
|
2853
|
+
def get_audio_config(self) -> dict[str, Any] | None:
|
|
2854
|
+
return self.global_config["thinker_config"].get("audio_config")
|
|
2855
|
+
|
|
2856
|
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
|
2857
|
+
# SinusoidsPositionEmbedding
|
|
2858
|
+
assert self.hparams_audio is not None
|
|
2859
|
+
max_timescale = 10000
|
|
2860
|
+
length = 1500
|
|
2861
|
+
channels = self.hparams_audio["hidden_size"]
|
|
2862
|
+
log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
|
|
2863
|
+
inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2).float())
|
|
2864
|
+
scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
|
|
2865
|
+
pos_embd = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1).to(dtype=torch.float32)
|
|
2866
|
+
yield ("audio_tower.embed_positions.weight", pos_embd)
|
|
2867
|
+
|
|
2868
|
+
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
|
2869
|
+
del bid, new_name, n_dims # unused
|
|
2870
|
+
if ".conv" in name and ".weight" in name:
|
|
2871
|
+
return gguf.GGMLQuantizationType.F16
|
|
2872
|
+
return False
|
|
2873
|
+
|
|
2874
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
2875
|
+
if name.startswith("thinker."):
|
|
2876
|
+
name = name.replace("thinker.", "")
|
|
2877
|
+
|
|
2878
|
+
if name.startswith("audio_tower"):
|
|
2879
|
+
# process audio tensors
|
|
2880
|
+
if "conv1.bias" in name or "conv2.bias" in name:
|
|
2881
|
+
# transpose conv1 and conv2 bias
|
|
2882
|
+
data_torch = data_torch.unsqueeze(-1)
|
|
2883
|
+
if "audio_bos_eos_token" in name:
|
|
2884
|
+
# this tensor is left unused in transformers code
|
|
2885
|
+
# https://github.com/huggingface/transformers/blob/6e3063422c4b1c014aa60c32b9254fd2902f0f28/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py#L1809
|
|
2886
|
+
return []
|
|
2887
|
+
return [(self.map_tensor_name(name), data_torch)]
|
|
2888
|
+
|
|
2889
|
+
return super().modify_tensors(data_torch, name, bid)
|
|
2890
|
+
|
|
2891
|
+
|
|
2724
2892
|
@ModelBase.register("InternVisionModel")
|
|
2725
|
-
class InternVisionModel(
|
|
2893
|
+
class InternVisionModel(MmprojModel):
|
|
2726
2894
|
def set_gguf_parameters(self):
|
|
2727
2895
|
super().set_gguf_parameters()
|
|
2728
2896
|
hparams = self.hparams
|
|
2729
|
-
self.gguf_writer.
|
|
2897
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.INTERNVL)
|
|
2730
2898
|
self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
|
|
2731
2899
|
# hidden_act
|
|
2732
2900
|
if hparams["hidden_act"] == "silu":
|
|
@@ -3517,7 +3685,7 @@ class InternLM3Model(TextModel):
|
|
|
3517
3685
|
return [(self.map_tensor_name(name), data_torch)]
|
|
3518
3686
|
|
|
3519
3687
|
|
|
3520
|
-
@ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel")
|
|
3688
|
+
@ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel", "BertForSequenceClassification")
|
|
3521
3689
|
class BertModel(TextModel):
|
|
3522
3690
|
model_arch = gguf.MODEL_ARCH.BERT
|
|
3523
3691
|
|
|
@@ -3525,11 +3693,21 @@ class BertModel(TextModel):
|
|
|
3525
3693
|
super().__init__(*args, **kwargs)
|
|
3526
3694
|
self.vocab_size = None
|
|
3527
3695
|
|
|
3696
|
+
if cls_out_labels := self.hparams.get("id2label"):
|
|
3697
|
+
if len(cls_out_labels) == 2 and cls_out_labels[0] == "LABEL_0":
|
|
3698
|
+
# Remove dummy labels added by AutoConfig
|
|
3699
|
+
cls_out_labels = None
|
|
3700
|
+
self.cls_out_labels = cls_out_labels
|
|
3701
|
+
|
|
3528
3702
|
def set_gguf_parameters(self):
|
|
3529
3703
|
super().set_gguf_parameters()
|
|
3530
3704
|
self.gguf_writer.add_causal_attention(False)
|
|
3531
3705
|
self._try_set_pooling_type()
|
|
3532
3706
|
|
|
3707
|
+
if self.cls_out_labels:
|
|
3708
|
+
key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch])
|
|
3709
|
+
self.gguf_writer.add_array(key_name, [v for k, v in sorted(self.cls_out_labels.items())])
|
|
3710
|
+
|
|
3533
3711
|
def set_vocab(self):
|
|
3534
3712
|
tokens, toktypes, tokpre = self.get_vocab_base()
|
|
3535
3713
|
self.vocab_size = len(tokens)
|
|
@@ -3580,6 +3758,14 @@ class BertModel(TextModel):
|
|
|
3580
3758
|
if name.startswith("cls.seq_relationship"):
|
|
3581
3759
|
return []
|
|
3582
3760
|
|
|
3761
|
+
if self.cls_out_labels:
|
|
3762
|
+
# For BertForSequenceClassification (direct projection layer)
|
|
3763
|
+
if name == "classifier.weight":
|
|
3764
|
+
name = "classifier.out_proj.weight"
|
|
3765
|
+
|
|
3766
|
+
if name == "classifier.bias":
|
|
3767
|
+
name = "classifier.out_proj.bias"
|
|
3768
|
+
|
|
3583
3769
|
return [(self.map_tensor_name(name), data_torch)]
|
|
3584
3770
|
|
|
3585
3771
|
def _xlmroberta_tokenizer_init(self) -> None:
|
|
@@ -3599,44 +3785,93 @@ class BertModel(TextModel):
|
|
|
3599
3785
|
from sentencepiece import sentencepiece_model_pb2 as model
|
|
3600
3786
|
|
|
3601
3787
|
tokenizer_path = self.dir_model / 'sentencepiece.bpe.model'
|
|
3788
|
+
|
|
3789
|
+
tokenizer_json = {}
|
|
3790
|
+
tokenizer_config_json = {}
|
|
3602
3791
|
if not tokenizer_path.is_file():
|
|
3603
|
-
|
|
3792
|
+
tokenizer_path = self.dir_model / 'tokenizer.json'
|
|
3793
|
+
tokenizer_config_path = self.dir_model / 'tokenizer_config.json'
|
|
3604
3794
|
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
|
|
3795
|
+
if not tokenizer_path.is_file():
|
|
3796
|
+
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
|
3608
3797
|
|
|
3609
|
-
|
|
3610
|
-
|
|
3611
|
-
|
|
3798
|
+
from base64 import b64decode
|
|
3799
|
+
from transformers import AutoTokenizer
|
|
3800
|
+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
|
|
3612
3801
|
|
|
3613
|
-
|
|
3614
|
-
|
|
3802
|
+
with open(tokenizer_path, "r", encoding="utf-8") as fp:
|
|
3803
|
+
tokenizer_json = json.load(fp)
|
|
3615
3804
|
|
|
3616
|
-
|
|
3805
|
+
if tokenizer_config_path.is_file():
|
|
3806
|
+
with open(tokenizer_config_path, "r", encoding="utf-8") as fp:
|
|
3807
|
+
tokenizer_config_json = json.load(fp)
|
|
3808
|
+
|
|
3809
|
+
add_prefix = tokenizer.add_prefix_space
|
|
3810
|
+
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
|
|
3811
|
+
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
|
|
3812
|
+
|
|
3813
|
+
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
|
|
3814
|
+
else:
|
|
3815
|
+
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
|
3816
|
+
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
|
3817
|
+
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
|
|
3818
|
+
|
|
3819
|
+
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
|
3820
|
+
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
|
|
3821
|
+
precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
|
|
3822
|
+
|
|
3823
|
+
tokenizer = SentencePieceProcessor()
|
|
3824
|
+
tokenizer.LoadFromFile(str(tokenizer_path))
|
|
3825
|
+
|
|
3826
|
+
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
|
3617
3827
|
|
|
3618
3828
|
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
|
3619
3829
|
scores: list[float] = [-10000.0] * vocab_size
|
|
3620
3830
|
toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
|
|
3621
3831
|
|
|
3622
|
-
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3832
|
+
if isinstance(tokenizer, SentencePieceProcessor):
|
|
3833
|
+
for token_id in range(tokenizer.vocab_size()):
|
|
3834
|
+
piece = tokenizer.IdToPiece(token_id)
|
|
3835
|
+
text = piece.encode("utf-8")
|
|
3836
|
+
score = tokenizer.GetScore(token_id)
|
|
3626
3837
|
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3838
|
+
toktype = SentencePieceTokenTypes.NORMAL
|
|
3839
|
+
if tokenizer.IsUnknown(token_id):
|
|
3840
|
+
toktype = SentencePieceTokenTypes.UNKNOWN
|
|
3841
|
+
elif tokenizer.IsControl(token_id):
|
|
3842
|
+
toktype = SentencePieceTokenTypes.CONTROL
|
|
3843
|
+
elif tokenizer.IsUnused(token_id):
|
|
3844
|
+
toktype = SentencePieceTokenTypes.UNUSED
|
|
3845
|
+
elif tokenizer.IsByte(token_id):
|
|
3846
|
+
toktype = SentencePieceTokenTypes.BYTE
|
|
3636
3847
|
|
|
3637
|
-
|
|
3638
|
-
|
|
3639
|
-
|
|
3848
|
+
tokens[token_id] = text
|
|
3849
|
+
scores[token_id] = score
|
|
3850
|
+
toktypes[token_id] = toktype
|
|
3851
|
+
else:
|
|
3852
|
+
added_vocab = tokenizer.get_added_vocab()
|
|
3853
|
+
unk_token = tokenizer_config_json.get("unk_token")
|
|
3854
|
+
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
|
|
3855
|
+
|
|
3856
|
+
for token_id in range(vocab_size):
|
|
3857
|
+
piece = tokenizer._convert_id_to_token(token_id)
|
|
3858
|
+
text = piece.encode("utf-8")
|
|
3859
|
+
score = tokenizer_json["model"]["vocab"][token_id][1]
|
|
3860
|
+
|
|
3861
|
+
toktype = SentencePieceTokenTypes.NORMAL
|
|
3862
|
+
if token_id == unk_token_id:
|
|
3863
|
+
toktype = SentencePieceTokenTypes.UNKNOWN
|
|
3864
|
+
elif token_id in tokenizer.all_special_ids:
|
|
3865
|
+
toktype = SentencePieceTokenTypes.CONTROL
|
|
3866
|
+
elif token_id in added_vocab.values():
|
|
3867
|
+
toktype = SentencePieceTokenTypes.USER_DEFINED
|
|
3868
|
+
# No reliable way to detect this, but jina doesn't have any
|
|
3869
|
+
# elif tokenizer.IsByte(token_id):
|
|
3870
|
+
# toktype = SentencePieceTokenTypes.BYTE
|
|
3871
|
+
|
|
3872
|
+
tokens[token_id] = text
|
|
3873
|
+
scores[token_id] = score
|
|
3874
|
+
toktypes[token_id] = toktype
|
|
3640
3875
|
|
|
3641
3876
|
if vocab_size > len(tokens):
|
|
3642
3877
|
pad_count = vocab_size - len(tokens)
|
|
@@ -3646,15 +3881,16 @@ class BertModel(TextModel):
|
|
|
3646
3881
|
scores.append(-1000.0)
|
|
3647
3882
|
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
|
3648
3883
|
|
|
3649
|
-
|
|
3650
|
-
|
|
3651
|
-
|
|
3652
|
-
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3656
|
-
|
|
3657
|
-
|
|
3884
|
+
if isinstance(tokenizer, SentencePieceProcessor):
|
|
3885
|
+
# realign tokens (see HF tokenizer code)
|
|
3886
|
+
tokens = [b'<s>', b'<pad>', b'</s>', b'<unk>'] + tokens[3:-1]
|
|
3887
|
+
scores = [0.0, 0.0, 0.0, 0.0] + scores[3:-1]
|
|
3888
|
+
toktypes = [
|
|
3889
|
+
SentencePieceTokenTypes.CONTROL,
|
|
3890
|
+
SentencePieceTokenTypes.CONTROL,
|
|
3891
|
+
SentencePieceTokenTypes.CONTROL,
|
|
3892
|
+
SentencePieceTokenTypes.UNKNOWN,
|
|
3893
|
+
] + toktypes[3:-1]
|
|
3658
3894
|
|
|
3659
3895
|
self.gguf_writer.add_tokenizer_model("t5")
|
|
3660
3896
|
self.gguf_writer.add_tokenizer_pre("default")
|
|
@@ -3674,7 +3910,27 @@ class BertModel(TextModel):
|
|
|
3674
3910
|
self.gguf_writer.add_add_eos_token(True)
|
|
3675
3911
|
|
|
3676
3912
|
|
|
3677
|
-
@ModelBase.register("
|
|
3913
|
+
@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
|
|
3914
|
+
class DistilBertModel(BertModel):
|
|
3915
|
+
model_arch = gguf.MODEL_ARCH.BERT
|
|
3916
|
+
|
|
3917
|
+
def set_gguf_parameters(self):
|
|
3918
|
+
self.gguf_writer.add_layer_norm_eps(1e-12)
|
|
3919
|
+
logger.info("gguf: layer norm epsilon = 1e-12")
|
|
3920
|
+
super().set_gguf_parameters()
|
|
3921
|
+
|
|
3922
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
3923
|
+
if name.startswith("distilbert."):
|
|
3924
|
+
name = name[11:]
|
|
3925
|
+
|
|
3926
|
+
# These layers act as MLM head, so we don't need them
|
|
3927
|
+
if name.startswith("vocab_"):
|
|
3928
|
+
return []
|
|
3929
|
+
|
|
3930
|
+
return super().modify_tensors(data_torch, name, bid)
|
|
3931
|
+
|
|
3932
|
+
|
|
3933
|
+
@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
|
|
3678
3934
|
class RobertaModel(BertModel):
|
|
3679
3935
|
model_arch = gguf.MODEL_ARCH.BERT
|
|
3680
3936
|
|
|
@@ -3984,11 +4240,11 @@ class Gemma3Model(TextModel):
|
|
|
3984
4240
|
|
|
3985
4241
|
|
|
3986
4242
|
@ModelBase.register("Gemma3ForConditionalGeneration")
|
|
3987
|
-
class Gemma3VisionModel(
|
|
4243
|
+
class Gemma3VisionModel(MmprojModel):
|
|
3988
4244
|
def set_gguf_parameters(self):
|
|
3989
4245
|
super().set_gguf_parameters()
|
|
3990
4246
|
hparams = self.hparams
|
|
3991
|
-
self.gguf_writer.
|
|
4247
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.GEMMA3)
|
|
3992
4248
|
# default values below are taken from HF tranformers code
|
|
3993
4249
|
self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
|
|
3994
4250
|
self.gguf_writer.add_vision_use_gelu(True)
|
|
@@ -5746,11 +6002,20 @@ class GraniteModel(LlamaModel):
|
|
|
5746
6002
|
logger.info("gguf: (granite) logits_scale = %s", logits_scale)
|
|
5747
6003
|
|
|
5748
6004
|
|
|
5749
|
-
@ModelBase.register("GraniteMoeForCausalLM")
|
|
6005
|
+
@ModelBase.register("GraniteMoeForCausalLM", "GraniteMoeSharedForCausalLM")
|
|
5750
6006
|
class GraniteMoeModel(GraniteModel):
|
|
5751
6007
|
"""Conversion for IBM's GraniteMoeForCausalLM"""
|
|
5752
6008
|
model_arch = gguf.MODEL_ARCH.GRANITE_MOE
|
|
5753
6009
|
|
|
6010
|
+
def set_gguf_parameters(self):
|
|
6011
|
+
"""GraniteMoeShared uses GraniteMoe parameters plus the following:
|
|
6012
|
+
- shared_intermediate_size
|
|
6013
|
+
"""
|
|
6014
|
+
super().set_gguf_parameters()
|
|
6015
|
+
if shared_feed_forward_length := self.hparams.get("shared_intermediate_size"):
|
|
6016
|
+
self.gguf_writer.add_expert_shared_feed_forward_length(shared_feed_forward_length)
|
|
6017
|
+
logger.info("gguf: (granitemoeshared) shared_feed_forward_length = %s", shared_feed_forward_length)
|
|
6018
|
+
|
|
5754
6019
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
5755
6020
|
"""In modeling_granitemoe, the JetMoe implementation of parallel experts
|
|
5756
6021
|
is used. This essentially merges w1 and w3 into a single tensor with 2x
|
|
@@ -5761,12 +6026,21 @@ class GraniteMoeModel(GraniteModel):
|
|
|
5761
6026
|
if name.endswith("block_sparse_moe.input_linear.weight"):
|
|
5762
6027
|
ffn_dim = self.hparams["intermediate_size"]
|
|
5763
6028
|
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
|
|
5764
|
-
gate, up = data_torch
|
|
6029
|
+
gate, up = data_torch.split(ffn_dim, dim=-2)
|
|
5765
6030
|
return [
|
|
5766
6031
|
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), gate),
|
|
5767
6032
|
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), up),
|
|
5768
6033
|
]
|
|
5769
6034
|
|
|
6035
|
+
if name.endswith("shared_mlp.input_linear.weight"):
|
|
6036
|
+
ffn_dim = self.hparams["shared_intermediate_size"]
|
|
6037
|
+
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
|
|
6038
|
+
gate, up = data_torch.split(ffn_dim, dim=-2)
|
|
6039
|
+
return [
|
|
6040
|
+
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate),
|
|
6041
|
+
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up),
|
|
6042
|
+
]
|
|
6043
|
+
|
|
5770
6044
|
return super().modify_tensors(data_torch, name, bid)
|
|
5771
6045
|
|
|
5772
6046
|
|
|
@@ -5917,6 +6191,65 @@ class ChameleonModel(TextModel):
|
|
|
5917
6191
|
return data_torch
|
|
5918
6192
|
|
|
5919
6193
|
|
|
6194
|
+
@ModelBase.register("UltravoxModel")
|
|
6195
|
+
class UltravoxModel(TextModel):
|
|
6196
|
+
model_arch = gguf.MODEL_ARCH.LLAMA # dummy
|
|
6197
|
+
|
|
6198
|
+
def __init__(self, *args, **kwargs):
|
|
6199
|
+
super().__init__(*args, **kwargs)
|
|
6200
|
+
raise NotImplementedError("Ultravox does not have text decoder. Instead, it uses Llama or other models for text. If you want to get the audio encoder, please use --mmproj argument")
|
|
6201
|
+
|
|
6202
|
+
|
|
6203
|
+
@ModelBase.register("Qwen2AudioForConditionalGeneration")
|
|
6204
|
+
class WhisperEncoderModel(MmprojModel):
|
|
6205
|
+
has_vision_encoder = False # no vision encoder
|
|
6206
|
+
has_audio_encoder = True
|
|
6207
|
+
|
|
6208
|
+
def __init__(self, *args, **kwargs):
|
|
6209
|
+
super().__init__(*args, **kwargs)
|
|
6210
|
+
self.hparams["hidden_size"] = self.hparams["d_model"]
|
|
6211
|
+
self.hparams["intermediate_size"] = self.hparams["encoder_ffn_dim"]
|
|
6212
|
+
self.hparams["num_attention_heads"] = self.hparams["encoder_attention_heads"]
|
|
6213
|
+
|
|
6214
|
+
def set_gguf_parameters(self):
|
|
6215
|
+
super().set_gguf_parameters()
|
|
6216
|
+
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2A)
|
|
6217
|
+
self.gguf_writer.add_audio_num_mel_bins(self.hparams["num_mel_bins"])
|
|
6218
|
+
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
|
|
6219
|
+
|
|
6220
|
+
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
|
6221
|
+
del bid, new_name, n_dims # unused
|
|
6222
|
+
if ".conv" in name and ".weight" in name:
|
|
6223
|
+
return gguf.GGMLQuantizationType.F16
|
|
6224
|
+
return False
|
|
6225
|
+
|
|
6226
|
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
6227
|
+
del bid # unused
|
|
6228
|
+
|
|
6229
|
+
if name.startswith("language_model."):
|
|
6230
|
+
# skip language model tensors
|
|
6231
|
+
return []
|
|
6232
|
+
|
|
6233
|
+
# prevent clash naming with vision tensors
|
|
6234
|
+
if name.startswith("multi_modal_projector"):
|
|
6235
|
+
name = "audio." + name
|
|
6236
|
+
|
|
6237
|
+
if "conv1.bias" in name or "conv2.bias" in name:
|
|
6238
|
+
# transpose conv1 and conv2 bias
|
|
6239
|
+
data_torch = data_torch.unsqueeze(-1)
|
|
6240
|
+
|
|
6241
|
+
return [(self.map_tensor_name(name), data_torch)]
|
|
6242
|
+
|
|
6243
|
+
|
|
6244
|
+
@ModelBase.register("UltravoxModel")
|
|
6245
|
+
class UltravoxWhisperEncoderModel(WhisperEncoderModel):
|
|
6246
|
+
has_vision_encoder = False # no vision encoder
|
|
6247
|
+
has_audio_encoder = True
|
|
6248
|
+
|
|
6249
|
+
def set_gguf_parameters(self):
|
|
6250
|
+
super().set_gguf_parameters()
|
|
6251
|
+
self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"])
|
|
6252
|
+
|
|
5920
6253
|
###### CONVERSION LOGIC ######
|
|
5921
6254
|
|
|
5922
6255
|
|
|
@@ -6092,13 +6425,15 @@ def split_str_to_n_bytes(split_str: str) -> int:
|
|
|
6092
6425
|
|
|
6093
6426
|
|
|
6094
6427
|
def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> str:
|
|
6428
|
+
# TODO @ngxson : this won't work correctly if the model has both audio & vision encoders
|
|
6429
|
+
# maybe we should fallback to text model's arch in that case, since not many models have both
|
|
6095
6430
|
text_config = hparams.get("text_config", {})
|
|
6096
6431
|
vision_config = hparams.get("vision_config", {})
|
|
6097
6432
|
arch = hparams["architectures"][0]
|
|
6098
6433
|
# if "architectures" is found in the sub-config, use that instead
|
|
6099
6434
|
if model_type == ModelType.TEXT and text_config.get("architectures") is not None:
|
|
6100
6435
|
arch = text_config["architectures"][0]
|
|
6101
|
-
elif model_type == ModelType.
|
|
6436
|
+
elif model_type == ModelType.MMPROJ and vision_config.get("architectures") is not None:
|
|
6102
6437
|
arch = vision_config["architectures"][0]
|
|
6103
6438
|
return arch
|
|
6104
6439
|
|
|
@@ -6161,7 +6496,7 @@ def main() -> None:
|
|
|
6161
6496
|
|
|
6162
6497
|
with torch.inference_mode():
|
|
6163
6498
|
output_type = ftype_map[args.outtype]
|
|
6164
|
-
model_type = ModelType.
|
|
6499
|
+
model_type = ModelType.MMPROJ if args.mmproj else ModelType.TEXT
|
|
6165
6500
|
hparams = ModelBase.load_hparams(dir_model)
|
|
6166
6501
|
model_architecture = get_model_architecture(hparams, model_type)
|
|
6167
6502
|
logger.info(f"Model architecture: {model_architecture}")
|