@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -177,6 +177,9 @@ class Keys:
|
|
|
177
177
|
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
|
178
178
|
BLOCK_COUNT = "{arch}.convnext.block_count"
|
|
179
179
|
|
|
180
|
+
class Classifier:
|
|
181
|
+
OUTPUT_LABELS = "{arch}.classifier.output_labels"
|
|
182
|
+
|
|
180
183
|
class Tokenizer:
|
|
181
184
|
MODEL = "tokenizer.ggml.model"
|
|
182
185
|
PRE = "tokenizer.ggml.pre"
|
|
@@ -219,10 +222,13 @@ class Keys:
|
|
|
219
222
|
TYPE = "adapter.type"
|
|
220
223
|
LORA_ALPHA = "adapter.lora.alpha"
|
|
221
224
|
|
|
222
|
-
class
|
|
225
|
+
class Clip:
|
|
223
226
|
PROJECTOR_TYPE = "clip.projector_type"
|
|
224
227
|
HAS_VISION_ENCODER = "clip.has_vision_encoder"
|
|
228
|
+
HAS_AUDIO_ENCODER = "clip.has_audio_encoder"
|
|
225
229
|
HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
|
|
230
|
+
|
|
231
|
+
class ClipVision:
|
|
226
232
|
IMAGE_SIZE = "clip.vision.image_size"
|
|
227
233
|
PATCH_SIZE = "clip.vision.patch_size"
|
|
228
234
|
EMBEDDING_LENGTH = "clip.vision.embedding_length"
|
|
@@ -243,19 +249,33 @@ class Keys:
|
|
|
243
249
|
class Projector:
|
|
244
250
|
SCALE_FACTOR = "clip.vision.projector.scale_factor"
|
|
245
251
|
|
|
252
|
+
class ClipAudio:
|
|
253
|
+
NUM_MEL_BINS = "clip.audio.num_mel_bins"
|
|
254
|
+
EMBEDDING_LENGTH = "clip.audio.embedding_length"
|
|
255
|
+
FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
|
|
256
|
+
PROJECTION_DIM = "clip.audio.projection_dim"
|
|
257
|
+
BLOCK_COUNT = "clip.audio.block_count"
|
|
258
|
+
|
|
259
|
+
class Attention:
|
|
260
|
+
HEAD_COUNT = "clip.audio.attention.head_count"
|
|
261
|
+
LAYERNORM_EPS = "clip.audio.attention.layer_norm_epsilon"
|
|
262
|
+
|
|
263
|
+
class Projector:
|
|
264
|
+
STACK_FACTOR = "clip.audio.projector.stack_factor"
|
|
265
|
+
|
|
246
266
|
#
|
|
247
267
|
# recommended mapping of model tensor names for storage in gguf
|
|
248
268
|
#
|
|
249
269
|
|
|
250
270
|
|
|
251
271
|
class GGUFType:
|
|
252
|
-
MODEL
|
|
253
|
-
ADAPTER
|
|
254
|
-
|
|
272
|
+
MODEL = "model"
|
|
273
|
+
ADAPTER = "adapter"
|
|
274
|
+
MMPROJ = "mmproj" # dummy, unused for now
|
|
255
275
|
|
|
256
276
|
|
|
257
277
|
class MODEL_ARCH(IntEnum):
|
|
258
|
-
|
|
278
|
+
MMPROJ = auto() # dummy arch for clip.cpp
|
|
259
279
|
LLAMA = auto()
|
|
260
280
|
LLAMA4 = auto()
|
|
261
281
|
DECI = auto()
|
|
@@ -482,14 +502,15 @@ class MODEL_TENSOR(IntEnum):
|
|
|
482
502
|
V_ENC_EMBD_CLS = auto()
|
|
483
503
|
V_ENC_EMBD_PATCH = auto()
|
|
484
504
|
V_ENC_EMBD_POS = auto()
|
|
505
|
+
V_ENC_INPUT_NORM = auto()
|
|
485
506
|
V_ENC_ATTN_Q = auto()
|
|
486
507
|
V_ENC_ATTN_Q_NORM = auto()
|
|
487
508
|
V_ENC_ATTN_K = auto()
|
|
488
509
|
V_ENC_ATTN_K_NORM = auto()
|
|
489
510
|
V_ENC_ATTN_V = auto()
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
511
|
+
V_ENC_ATTN_O = auto()
|
|
512
|
+
V_ENC_ATTN_O_NORM = auto()
|
|
513
|
+
V_ENC_POST_ATTN_NORM = auto()
|
|
493
514
|
V_ENC_FFN_UP = auto()
|
|
494
515
|
V_ENC_FFN_GATE = auto()
|
|
495
516
|
V_ENC_FFN_DOWN = auto()
|
|
@@ -513,10 +534,28 @@ class MODEL_TENSOR(IntEnum):
|
|
|
513
534
|
V_RESMPL_QUERY = auto() # minicpmv
|
|
514
535
|
V_TOK_EMBD_IMG_BREAK = auto() # pixtral
|
|
515
536
|
V_MM_PATCH_MERGER = auto() # mistral small 3.1
|
|
537
|
+
# audio (mtmd)
|
|
538
|
+
A_ENC_EMBD_POS = auto()
|
|
539
|
+
A_ENC_CONV1D = auto()
|
|
540
|
+
A_PRE_NORM = auto()
|
|
541
|
+
A_POST_NORM = auto()
|
|
542
|
+
A_ENC_ATTN_Q = auto()
|
|
543
|
+
A_ENC_ATTN_K = auto()
|
|
544
|
+
A_ENC_ATTN_V = auto()
|
|
545
|
+
A_ENC_INPUT_NORM = auto()
|
|
546
|
+
A_ENC_OUTPUT = auto()
|
|
547
|
+
A_ENC_OUTPUT_NORM = auto()
|
|
548
|
+
A_ENC_FFN_UP = auto()
|
|
549
|
+
A_ENC_FFN_GATE = auto()
|
|
550
|
+
A_ENC_FFN_DOWN = auto()
|
|
551
|
+
A_MMPROJ = auto()
|
|
552
|
+
A_MMPROJ_FC = auto()
|
|
553
|
+
A_MM_NORM_PRE = auto()
|
|
554
|
+
A_MM_NORM_MID = auto()
|
|
516
555
|
|
|
517
556
|
|
|
518
557
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
519
|
-
MODEL_ARCH.
|
|
558
|
+
MODEL_ARCH.MMPROJ: "clip", # dummy arch for clip.cpp
|
|
520
559
|
MODEL_ARCH.LLAMA: "llama",
|
|
521
560
|
MODEL_ARCH.LLAMA4: "llama4",
|
|
522
561
|
MODEL_ARCH.DECI: "deci",
|
|
@@ -749,8 +788,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
749
788
|
MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
|
|
750
789
|
MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
|
|
751
790
|
MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
|
|
752
|
-
MODEL_TENSOR.
|
|
753
|
-
MODEL_TENSOR.
|
|
791
|
+
MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
|
|
792
|
+
MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
|
|
793
|
+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
|
|
754
794
|
MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
|
|
755
795
|
MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
|
|
756
796
|
MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
|
|
@@ -774,10 +814,28 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
|
774
814
|
MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
|
|
775
815
|
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
|
|
776
816
|
MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
|
|
817
|
+
# audio (mtmd)
|
|
818
|
+
MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
|
|
819
|
+
MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
|
|
820
|
+
MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
|
|
821
|
+
MODEL_TENSOR.A_POST_NORM: "a.post_ln",
|
|
822
|
+
MODEL_TENSOR.A_ENC_ATTN_Q: "a.blk.{bid}.attn_q",
|
|
823
|
+
MODEL_TENSOR.A_ENC_ATTN_K: "a.blk.{bid}.attn_k",
|
|
824
|
+
MODEL_TENSOR.A_ENC_ATTN_V: "a.blk.{bid}.attn_v",
|
|
825
|
+
MODEL_TENSOR.A_ENC_INPUT_NORM: "a.blk.{bid}.ln1",
|
|
826
|
+
MODEL_TENSOR.A_ENC_OUTPUT: "a.blk.{bid}.attn_out",
|
|
827
|
+
MODEL_TENSOR.A_ENC_OUTPUT_NORM: "a.blk.{bid}.ln2",
|
|
828
|
+
MODEL_TENSOR.A_ENC_FFN_UP: "a.blk.{bid}.ffn_up",
|
|
829
|
+
MODEL_TENSOR.A_ENC_FFN_GATE: "a.blk.{bid}.ffn_gate",
|
|
830
|
+
MODEL_TENSOR.A_ENC_FFN_DOWN: "a.blk.{bid}.ffn_down",
|
|
831
|
+
MODEL_TENSOR.A_MMPROJ: "mm.a.mlp.{bid}",
|
|
832
|
+
MODEL_TENSOR.A_MMPROJ_FC: "mm.a.fc",
|
|
833
|
+
MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
|
|
834
|
+
MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
|
|
777
835
|
}
|
|
778
836
|
|
|
779
837
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
780
|
-
MODEL_ARCH.
|
|
838
|
+
MODEL_ARCH.MMPROJ: [
|
|
781
839
|
MODEL_TENSOR.V_MMPROJ,
|
|
782
840
|
MODEL_TENSOR.V_MMPROJ_FC,
|
|
783
841
|
MODEL_TENSOR.V_MMPROJ_MLP,
|
|
@@ -785,14 +843,15 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
785
843
|
MODEL_TENSOR.V_ENC_EMBD_CLS,
|
|
786
844
|
MODEL_TENSOR.V_ENC_EMBD_PATCH,
|
|
787
845
|
MODEL_TENSOR.V_ENC_EMBD_POS,
|
|
846
|
+
MODEL_TENSOR.V_ENC_INPUT_NORM,
|
|
788
847
|
MODEL_TENSOR.V_ENC_ATTN_Q,
|
|
789
848
|
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
|
|
790
849
|
MODEL_TENSOR.V_ENC_ATTN_K,
|
|
791
850
|
MODEL_TENSOR.V_ENC_ATTN_K_NORM,
|
|
792
851
|
MODEL_TENSOR.V_ENC_ATTN_V,
|
|
793
|
-
MODEL_TENSOR.
|
|
794
|
-
MODEL_TENSOR.
|
|
795
|
-
MODEL_TENSOR.
|
|
852
|
+
MODEL_TENSOR.V_ENC_ATTN_O,
|
|
853
|
+
MODEL_TENSOR.V_ENC_ATTN_O_NORM,
|
|
854
|
+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
|
|
796
855
|
MODEL_TENSOR.V_ENC_FFN_UP,
|
|
797
856
|
MODEL_TENSOR.V_ENC_FFN_GATE,
|
|
798
857
|
MODEL_TENSOR.V_ENC_FFN_DOWN,
|
|
@@ -816,6 +875,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
816
875
|
MODEL_TENSOR.V_RESMPL_QUERY,
|
|
817
876
|
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
|
|
818
877
|
MODEL_TENSOR.V_MM_PATCH_MERGER,
|
|
878
|
+
# audio
|
|
879
|
+
MODEL_TENSOR.A_ENC_EMBD_POS,
|
|
880
|
+
MODEL_TENSOR.A_ENC_CONV1D,
|
|
881
|
+
MODEL_TENSOR.A_PRE_NORM,
|
|
882
|
+
MODEL_TENSOR.A_POST_NORM,
|
|
883
|
+
MODEL_TENSOR.A_ENC_ATTN_Q,
|
|
884
|
+
MODEL_TENSOR.A_ENC_ATTN_K,
|
|
885
|
+
MODEL_TENSOR.A_ENC_ATTN_V,
|
|
886
|
+
MODEL_TENSOR.A_ENC_INPUT_NORM,
|
|
887
|
+
MODEL_TENSOR.A_ENC_OUTPUT,
|
|
888
|
+
MODEL_TENSOR.A_ENC_OUTPUT_NORM,
|
|
889
|
+
MODEL_TENSOR.A_ENC_FFN_UP,
|
|
890
|
+
MODEL_TENSOR.A_ENC_FFN_GATE,
|
|
891
|
+
MODEL_TENSOR.A_ENC_FFN_DOWN,
|
|
892
|
+
MODEL_TENSOR.A_MMPROJ,
|
|
893
|
+
MODEL_TENSOR.A_MMPROJ_FC,
|
|
894
|
+
MODEL_TENSOR.A_MM_NORM_PRE,
|
|
895
|
+
MODEL_TENSOR.A_MM_NORM_MID,
|
|
819
896
|
],
|
|
820
897
|
MODEL_ARCH.LLAMA: [
|
|
821
898
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
@@ -959,6 +1036,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
959
1036
|
MODEL_TENSOR.POS_EMBD,
|
|
960
1037
|
MODEL_TENSOR.OUTPUT_NORM,
|
|
961
1038
|
MODEL_TENSOR.ATTN_OUT_NORM,
|
|
1039
|
+
MODEL_TENSOR.ATTN_QKV,
|
|
962
1040
|
MODEL_TENSOR.ATTN_Q,
|
|
963
1041
|
MODEL_TENSOR.ATTN_K,
|
|
964
1042
|
MODEL_TENSOR.ATTN_V,
|
|
@@ -1905,6 +1983,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
1905
1983
|
MODEL_TENSOR.FFN_GATE_EXP,
|
|
1906
1984
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
1907
1985
|
MODEL_TENSOR.FFN_UP_EXP,
|
|
1986
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
1987
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
1988
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
1908
1989
|
],
|
|
1909
1990
|
MODEL_ARCH.CHAMELEON: [
|
|
1910
1991
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
@@ -2177,9 +2258,13 @@ class VisionProjectorType:
|
|
|
2177
2258
|
GEMMA3 = "gemma3"
|
|
2178
2259
|
IDEFICS3 = "idefics3"
|
|
2179
2260
|
PIXTRAL = "pixtral"
|
|
2261
|
+
LLAMA4 = "llama4"
|
|
2180
2262
|
QWEN2VL = "qwen2vl_merger"
|
|
2181
2263
|
QWEN25VL = "qwen2.5vl_merger"
|
|
2264
|
+
ULTRAVOX = "ultravox"
|
|
2182
2265
|
INTERNVL = "internvl"
|
|
2266
|
+
QWEN2A = "qwen2a" # audio
|
|
2267
|
+
QWEN25O = "qwen2.5o" # omni
|
|
2183
2268
|
|
|
2184
2269
|
|
|
2185
2270
|
# Items here are (block size, type size)
|
|
@@ -251,7 +251,7 @@ class GGUFReader:
|
|
|
251
251
|
offs += curr_size
|
|
252
252
|
return offs - orig_offs, aparts, data_idxs, types
|
|
253
253
|
# We can't deal with this one.
|
|
254
|
-
raise ValueError('Unknown/unhandled field type {gtype}')
|
|
254
|
+
raise ValueError(f'Unknown/unhandled field type {gtype}')
|
|
255
255
|
|
|
256
256
|
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
|
257
257
|
offs = orig_offs
|
|
@@ -49,6 +49,7 @@ class TensorInfo:
|
|
|
49
49
|
class GGUFValue:
|
|
50
50
|
value: Any
|
|
51
51
|
type: GGUFValueType
|
|
52
|
+
sub_type: GGUFValueType | None = None
|
|
52
53
|
|
|
53
54
|
|
|
54
55
|
class WriterState(Enum):
|
|
@@ -238,7 +239,7 @@ class GGUFWriter:
|
|
|
238
239
|
|
|
239
240
|
for key, val in kv_data.items():
|
|
240
241
|
kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
|
|
241
|
-
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
|
|
242
|
+
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
|
|
242
243
|
|
|
243
244
|
fout.write(kv_bytes)
|
|
244
245
|
|
|
@@ -268,11 +269,11 @@ class GGUFWriter:
|
|
|
268
269
|
fout.flush()
|
|
269
270
|
self.state = WriterState.TI_DATA
|
|
270
271
|
|
|
271
|
-
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
|
272
|
+
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
|
|
272
273
|
if any(key in kv_data for kv_data in self.kv_data):
|
|
273
274
|
raise ValueError(f'Duplicated key name {key!r}')
|
|
274
275
|
|
|
275
|
-
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
|
276
|
+
self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
|
|
276
277
|
|
|
277
278
|
def add_uint8(self, key: str, val: int) -> None:
|
|
278
279
|
self.add_key_value(key,val, GGUFValueType.UINT8)
|
|
@@ -896,7 +897,7 @@ class GGUFWriter:
|
|
|
896
897
|
def add_remove_extra_whitespaces(self, value: bool) -> None:
|
|
897
898
|
self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
|
|
898
899
|
|
|
899
|
-
def add_precompiled_charsmap(self, charsmap:
|
|
900
|
+
def add_precompiled_charsmap(self, charsmap: bytes) -> None:
|
|
900
901
|
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
|
|
901
902
|
|
|
902
903
|
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
|
@@ -936,12 +937,18 @@ class GGUFWriter:
|
|
|
936
937
|
|
|
937
938
|
# for vision models
|
|
938
939
|
|
|
940
|
+
def add_clip_has_vision_encoder(self, value: bool) -> None:
|
|
941
|
+
self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
|
|
942
|
+
|
|
943
|
+
def add_clip_has_audio_encoder(self, value: bool) -> None:
|
|
944
|
+
self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
|
|
945
|
+
|
|
946
|
+
def add_clip_projector_type(self, value: str) -> None:
|
|
947
|
+
self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
|
|
948
|
+
|
|
939
949
|
def add_vision_projection_dim(self, value: int) -> None:
|
|
940
950
|
self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
|
|
941
951
|
|
|
942
|
-
def add_vision_has_vision_encoder(self, value: bool) -> None:
|
|
943
|
-
self.add_bool(Keys.ClipVision.HAS_VISION_ENCODER, value)
|
|
944
|
-
|
|
945
952
|
def add_vision_patch_size(self, value: int) -> None:
|
|
946
953
|
self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
|
|
947
954
|
|
|
@@ -957,9 +964,6 @@ class GGUFWriter:
|
|
|
957
964
|
def add_vision_head_count(self, value: int) -> None:
|
|
958
965
|
self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
|
|
959
966
|
|
|
960
|
-
def add_vision_projector_type(self, value: str) -> None:
|
|
961
|
-
self.add_string(Keys.ClipVision.PROJECTOR_TYPE, value)
|
|
962
|
-
|
|
963
967
|
def add_vision_attention_layernorm_eps(self, value: float) -> None:
|
|
964
968
|
self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
|
|
965
969
|
|
|
@@ -987,13 +991,39 @@ class GGUFWriter:
|
|
|
987
991
|
def add_vision_n_wa_pattern(self, value: int) -> None:
|
|
988
992
|
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
|
989
993
|
|
|
994
|
+
# audio models
|
|
995
|
+
|
|
996
|
+
def add_audio_projection_dim(self, value: int) -> None:
|
|
997
|
+
self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
|
|
998
|
+
|
|
999
|
+
def add_audio_embedding_length(self, value: int) -> None:
|
|
1000
|
+
self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
|
|
1001
|
+
|
|
1002
|
+
def add_audio_feed_forward_length(self, value: int) -> None:
|
|
1003
|
+
self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
|
|
1004
|
+
|
|
1005
|
+
def add_audio_block_count(self, value: int) -> None:
|
|
1006
|
+
self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
|
|
1007
|
+
|
|
1008
|
+
def add_audio_head_count(self, value: int) -> None:
|
|
1009
|
+
self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
|
|
1010
|
+
|
|
1011
|
+
def add_audio_attention_layernorm_eps(self, value: float) -> None:
|
|
1012
|
+
self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
|
|
1013
|
+
|
|
1014
|
+
def add_audio_num_mel_bins(self, value: int) -> None:
|
|
1015
|
+
self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
|
|
1016
|
+
|
|
1017
|
+
def add_audio_stack_factor(self, value: int) -> None:
|
|
1018
|
+
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
|
|
1019
|
+
|
|
990
1020
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
|
991
1021
|
pack_prefix = ''
|
|
992
1022
|
if not skip_pack_prefix:
|
|
993
1023
|
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
|
994
1024
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
|
995
1025
|
|
|
996
|
-
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
|
1026
|
+
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
|
|
997
1027
|
kv_data = bytearray()
|
|
998
1028
|
|
|
999
1029
|
if add_vtype:
|
|
@@ -1014,7 +1044,9 @@ class GGUFWriter:
|
|
|
1014
1044
|
if len(val) == 0:
|
|
1015
1045
|
raise ValueError("Invalid GGUF metadata array. Empty array")
|
|
1016
1046
|
|
|
1017
|
-
if
|
|
1047
|
+
if sub_type is not None:
|
|
1048
|
+
ltype = sub_type
|
|
1049
|
+
elif isinstance(val, bytes):
|
|
1018
1050
|
ltype = GGUFValueType.UINT8
|
|
1019
1051
|
else:
|
|
1020
1052
|
ltype = GGUFValueType.get_type(val[0])
|
|
@@ -823,6 +823,7 @@ class GGUFEditorWindow(QMainWindow):
|
|
|
823
823
|
self.modified = False
|
|
824
824
|
self.metadata_changes = {} # Store changes to apply when saving
|
|
825
825
|
self.metadata_to_remove = set() # Store keys to remove when saving
|
|
826
|
+
self.on_metadata_changed_is_connected = False
|
|
826
827
|
|
|
827
828
|
self.setup_ui()
|
|
828
829
|
|
|
@@ -941,9 +942,11 @@ class GGUFEditorWindow(QMainWindow):
|
|
|
941
942
|
return
|
|
942
943
|
|
|
943
944
|
# Disconnect to prevent triggering during loading
|
|
944
|
-
|
|
945
|
-
warnings.
|
|
946
|
-
|
|
945
|
+
if self.on_metadata_changed_is_connected:
|
|
946
|
+
with warnings.catch_warnings():
|
|
947
|
+
warnings.filterwarnings('ignore')
|
|
948
|
+
self.metadata_table.itemChanged.disconnect(self.on_metadata_changed)
|
|
949
|
+
self.on_metadata_changed_is_connected = False
|
|
947
950
|
|
|
948
951
|
for i, (key, field) in enumerate(self.reader.fields.items()):
|
|
949
952
|
self.metadata_table.insertRow(i)
|
|
@@ -1021,6 +1024,7 @@ class GGUFEditorWindow(QMainWindow):
|
|
|
1021
1024
|
|
|
1022
1025
|
# Reconnect after loading
|
|
1023
1026
|
self.metadata_table.itemChanged.connect(self.on_metadata_changed)
|
|
1027
|
+
self.on_metadata_changed_is_connected = True
|
|
1024
1028
|
|
|
1025
1029
|
def extract_array_values(self, field: ReaderField) -> list:
|
|
1026
1030
|
"""Extract all values from an array field."""
|
|
@@ -1517,19 +1521,21 @@ class GGUFEditorWindow(QMainWindow):
|
|
|
1517
1521
|
continue
|
|
1518
1522
|
|
|
1519
1523
|
# Apply changes if any
|
|
1524
|
+
sub_type = None
|
|
1520
1525
|
if field.name in self.metadata_changes:
|
|
1521
1526
|
value_type, value = self.metadata_changes[field.name]
|
|
1522
1527
|
if value_type == GGUFValueType.ARRAY:
|
|
1523
1528
|
# Handle array values
|
|
1524
|
-
|
|
1525
|
-
writer.add_array(field.name, array_values)
|
|
1526
|
-
else:
|
|
1527
|
-
writer.add_key_value(field.name, value, value_type)
|
|
1529
|
+
sub_type, value = value
|
|
1528
1530
|
else:
|
|
1529
1531
|
# Copy original value
|
|
1530
1532
|
value = field.contents()
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
+
value_type = field.types[0]
|
|
1534
|
+
if value_type == GGUFValueType.ARRAY:
|
|
1535
|
+
sub_type = field.types[-1]
|
|
1536
|
+
|
|
1537
|
+
if value is not None:
|
|
1538
|
+
writer.add_key_value(field.name, value, value_type, sub_type=sub_type)
|
|
1533
1539
|
|
|
1534
1540
|
# Add new metadata
|
|
1535
1541
|
for key, (value_type, value) in self.metadata_changes.items():
|
|
@@ -1537,7 +1543,12 @@ class GGUFEditorWindow(QMainWindow):
|
|
|
1537
1543
|
if self.reader.get_field(key) is not None:
|
|
1538
1544
|
continue
|
|
1539
1545
|
|
|
1540
|
-
|
|
1546
|
+
sub_type = None
|
|
1547
|
+
if value_type == GGUFValueType.ARRAY:
|
|
1548
|
+
# Handle array values
|
|
1549
|
+
sub_type, value = value
|
|
1550
|
+
|
|
1551
|
+
writer.add_key_value(key, value, value_type, sub_type=sub_type)
|
|
1541
1552
|
|
|
1542
1553
|
# Add tensors (including data)
|
|
1543
1554
|
for tensor in self.reader.tensors:
|
|
@@ -24,6 +24,7 @@ class MetadataDetails(NamedTuple):
|
|
|
24
24
|
type: gguf.GGUFValueType
|
|
25
25
|
value: Any
|
|
26
26
|
description: str = ''
|
|
27
|
+
sub_type: gguf.GGUFValueType | None = None
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
|
|
@@ -57,7 +58,9 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
|
|
|
57
58
|
logger.debug(f'Removing {field.name}')
|
|
58
59
|
continue
|
|
59
60
|
|
|
60
|
-
|
|
61
|
+
val_type = field.types[0]
|
|
62
|
+
sub_type = field.types[-1] if val_type == gguf.GGUFValueType.ARRAY else None
|
|
63
|
+
old_val = MetadataDetails(val_type, field.contents(), sub_type=sub_type)
|
|
61
64
|
val = new_metadata.get(field.name, old_val)
|
|
62
65
|
|
|
63
66
|
if field.name in new_metadata:
|
|
@@ -67,7 +70,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
|
|
|
67
70
|
logger.debug(f'Copying {field.name}')
|
|
68
71
|
|
|
69
72
|
if val.value is not None:
|
|
70
|
-
writer.add_key_value(field.name, val.value, val.type)
|
|
73
|
+
writer.add_key_value(field.name, val.value, val.type, sub_type=sub_type if val.sub_type is None else val.sub_type)
|
|
71
74
|
|
|
72
75
|
if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
|
|
73
76
|
logger.debug('Adding chat template(s)')
|