@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -68,7 +68,7 @@ class TensorNameMap:
|
|
|
68
68
|
"output_layer", # chatglm
|
|
69
69
|
"head", # rwkv
|
|
70
70
|
"head.out", # wavtokenizer
|
|
71
|
-
"
|
|
71
|
+
"lm_head", # llama4
|
|
72
72
|
),
|
|
73
73
|
|
|
74
74
|
# Output norm
|
|
@@ -91,7 +91,7 @@ class TensorNameMap:
|
|
|
91
91
|
"rwkv.ln_out", # rwkv6
|
|
92
92
|
"model.ln_out", # rwkv7
|
|
93
93
|
"backbone.final_layer_norm", # wavtokenizer
|
|
94
|
-
"
|
|
94
|
+
"model.norm", # llama4
|
|
95
95
|
),
|
|
96
96
|
|
|
97
97
|
# Rope frequencies
|
|
@@ -133,7 +133,7 @@ class TensorNameMap:
|
|
|
133
133
|
"transformer.layers.{bid}.attn_norm", # openelm
|
|
134
134
|
"rwkv.blocks.{bid}.ln1", # rwkv6
|
|
135
135
|
"model.layers.{bid}.ln1", # rwkv7
|
|
136
|
-
"
|
|
136
|
+
"model.layers.{bid}.input_layernorm", # llama4
|
|
137
137
|
),
|
|
138
138
|
|
|
139
139
|
# Attention norm 2
|
|
@@ -157,6 +157,7 @@ class TensorNameMap:
|
|
|
157
157
|
"h.{bid}.attn.c_attn", # gpt2
|
|
158
158
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
|
159
159
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
|
160
|
+
"encoder.layers.{bid}.mixer.Wqkv", # jina
|
|
160
161
|
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
|
161
162
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
|
162
163
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
|
@@ -168,12 +169,13 @@ class TensorNameMap:
|
|
|
168
169
|
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
|
169
170
|
"layers.{bid}.attention.wq", # llama-pth
|
|
170
171
|
"encoder.layer.{bid}.attention.self.query", # bert
|
|
172
|
+
"transformer.layer.{bid}.attention.q_lin", # distillbert
|
|
171
173
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
|
172
174
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
|
173
175
|
"model.layers.{bid}.attention.wq", # internlm2
|
|
174
176
|
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
|
175
177
|
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
|
176
|
-
"
|
|
178
|
+
"model.layers.{bid}.self_attn.q_proj", # llama4
|
|
177
179
|
),
|
|
178
180
|
|
|
179
181
|
# Attention key
|
|
@@ -182,13 +184,14 @@ class TensorNameMap:
|
|
|
182
184
|
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
|
183
185
|
"layers.{bid}.attention.wk", # llama-pth
|
|
184
186
|
"encoder.layer.{bid}.attention.self.key", # bert
|
|
187
|
+
"transformer.layer.{bid}.attention.k_lin", # distillbert
|
|
185
188
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
|
186
189
|
"transformer.h.{bid}.attn.k", # refact
|
|
187
190
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
|
188
191
|
"model.layers.{bid}.attention.wk", # internlm2
|
|
189
192
|
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
|
190
193
|
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
|
191
|
-
"
|
|
194
|
+
"model.layers.{bid}.self_attn.k_proj", # llama4
|
|
192
195
|
),
|
|
193
196
|
|
|
194
197
|
# Attention value
|
|
@@ -196,13 +199,14 @@ class TensorNameMap:
|
|
|
196
199
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
|
197
200
|
"layers.{bid}.attention.wv", # llama-pth
|
|
198
201
|
"encoder.layer.{bid}.attention.self.value", # bert
|
|
202
|
+
"transformer.layer.{bid}.attention.v_lin", # distillbert
|
|
199
203
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
|
200
204
|
"transformer.h.{bid}.attn.v", # refact
|
|
201
205
|
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
|
202
206
|
"model.layers.{bid}.attention.wv", # internlm2
|
|
203
207
|
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
|
|
204
208
|
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
|
205
|
-
"
|
|
209
|
+
"model.layers.{bid}.self_attn.v_proj", # llama4
|
|
206
210
|
),
|
|
207
211
|
|
|
208
212
|
# Attention output
|
|
@@ -216,6 +220,7 @@ class TensorNameMap:
|
|
|
216
220
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
|
217
221
|
"layers.{bid}.attention.wo", # llama-pth
|
|
218
222
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
|
223
|
+
"transformer.layer.{bid}.attention.out_lin", # distillbert
|
|
219
224
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
|
220
225
|
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
|
221
226
|
"model.layers.{bid}.self_attn.dense", # persimmon
|
|
@@ -224,17 +229,19 @@ class TensorNameMap:
|
|
|
224
229
|
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
|
|
225
230
|
"model.layers.{bid}.attention.wo", # internlm2
|
|
226
231
|
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
|
232
|
+
"encoder.layers.{bid}.mixer.out_proj", # jina
|
|
227
233
|
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
|
|
228
234
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
|
229
235
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
|
230
236
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
|
231
237
|
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
|
232
|
-
"
|
|
238
|
+
"model.layers.{bid}.self_attn.o_proj", # llama4
|
|
233
239
|
),
|
|
234
240
|
|
|
235
241
|
# Attention output norm
|
|
236
242
|
MODEL_TENSOR.ATTN_OUT_NORM: (
|
|
237
243
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
|
244
|
+
"transformer.layer.{bid}.sa_layer_norm", # distillbert
|
|
238
245
|
"encoder.layers.{bid}.norm1", # nomic-bert
|
|
239
246
|
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
|
240
247
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
|
@@ -268,7 +275,7 @@ class TensorNameMap:
|
|
|
268
275
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
|
269
276
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
270
277
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
271
|
-
"
|
|
278
|
+
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
272
279
|
),
|
|
273
280
|
|
|
274
281
|
# Post feed-forward norm
|
|
@@ -289,7 +296,7 @@ class TensorNameMap:
|
|
|
289
296
|
"transformer.decoder_layer.{bid}.router", # Grok
|
|
290
297
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
|
291
298
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
|
292
|
-
"
|
|
299
|
+
"model.layers.{bid}.feed_forward.router", # llama4
|
|
293
300
|
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
|
294
301
|
),
|
|
295
302
|
|
|
@@ -311,6 +318,7 @@ class TensorNameMap:
|
|
|
311
318
|
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
|
312
319
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
|
313
320
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
|
321
|
+
"transformer.layer.{bid}.ffn.lin1", # distillbert
|
|
314
322
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
|
315
323
|
"transformer.h.{bid}.mlp.linear_3", # refact
|
|
316
324
|
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
|
@@ -329,7 +337,7 @@ class TensorNameMap:
|
|
|
329
337
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
330
338
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
331
339
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
332
|
-
"
|
|
340
|
+
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
333
341
|
),
|
|
334
342
|
|
|
335
343
|
MODEL_TENSOR.FFN_UP_EXP: (
|
|
@@ -338,14 +346,14 @@ class TensorNameMap:
|
|
|
338
346
|
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
|
339
347
|
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
|
340
348
|
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
|
341
|
-
"
|
|
349
|
+
"model.layers.{bid}.feed_forward.experts.up_proj", # llama4
|
|
342
350
|
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
|
|
343
351
|
),
|
|
344
352
|
|
|
345
353
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
|
346
|
-
"model.layers.{bid}.mlp.shared_expert.up_proj",
|
|
347
|
-
"model.layers.{bid}.mlp.shared_experts.up_proj",
|
|
348
|
-
"
|
|
354
|
+
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
|
355
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
|
356
|
+
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
|
349
357
|
),
|
|
350
358
|
|
|
351
359
|
# AWQ-activation gate
|
|
@@ -366,22 +374,22 @@ class TensorNameMap:
|
|
|
366
374
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
367
375
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
368
376
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
369
|
-
"
|
|
377
|
+
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
|
370
378
|
),
|
|
371
379
|
|
|
372
380
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
|
373
|
-
"layers.{bid}.feed_forward.experts.w1",
|
|
374
|
-
"transformer.decoder_layer.{bid}.moe.linear",
|
|
375
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
|
376
|
-
"model.layers.{bid}.mlp.experts.gate_proj",
|
|
377
|
-
"model.layers.{bid}.block_sparse_moe.experts.w1",
|
|
378
|
-
"
|
|
381
|
+
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
|
382
|
+
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
|
383
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
|
384
|
+
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
|
385
|
+
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
|
386
|
+
"model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
|
|
379
387
|
),
|
|
380
388
|
|
|
381
389
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
|
382
|
-
"model.layers.{bid}.mlp.shared_expert.gate_proj",
|
|
383
|
-
"model.layers.{bid}.mlp.shared_experts.gate_proj",
|
|
384
|
-
"
|
|
390
|
+
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
|
391
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
|
392
|
+
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
|
385
393
|
),
|
|
386
394
|
|
|
387
395
|
# Feed-forward down
|
|
@@ -394,6 +402,7 @@ class TensorNameMap:
|
|
|
394
402
|
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
|
395
403
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
|
396
404
|
"encoder.layer.{bid}.output.dense", # bert
|
|
405
|
+
"transformer.layer.{bid}.ffn.lin2", # distillbert
|
|
397
406
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
|
398
407
|
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
|
399
408
|
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
|
@@ -410,7 +419,7 @@ class TensorNameMap:
|
|
|
410
419
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
|
411
420
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
412
421
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
413
|
-
"
|
|
422
|
+
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
414
423
|
),
|
|
415
424
|
|
|
416
425
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
|
@@ -420,14 +429,15 @@ class TensorNameMap:
|
|
|
420
429
|
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
|
421
430
|
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
|
422
431
|
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
|
423
|
-
"
|
|
432
|
+
"model.layers.{bid}.feed_forward.experts.down_proj", # llama4
|
|
424
433
|
"encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
|
|
425
434
|
),
|
|
426
435
|
|
|
427
436
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
|
428
|
-
"model.layers.{bid}.mlp.shared_expert.down_proj",
|
|
429
|
-
"model.layers.{bid}.mlp.shared_experts.down_proj",
|
|
430
|
-
"
|
|
437
|
+
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
|
438
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
|
439
|
+
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
|
440
|
+
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
|
|
431
441
|
),
|
|
432
442
|
|
|
433
443
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
|
@@ -454,6 +464,7 @@ class TensorNameMap:
|
|
|
454
464
|
|
|
455
465
|
MODEL_TENSOR.LAYER_OUT_NORM: (
|
|
456
466
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
|
467
|
+
"transformer.layer.{bid}.output_layer_norm", # distillbert
|
|
457
468
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
|
458
469
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
|
459
470
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
|
@@ -824,6 +835,7 @@ class TensorNameMap:
|
|
|
824
835
|
MODEL_TENSOR.CLS: (
|
|
825
836
|
"classifier", # jina
|
|
826
837
|
"classifier.dense", # roberta
|
|
838
|
+
"pre_classifier", # distillbert
|
|
827
839
|
),
|
|
828
840
|
|
|
829
841
|
MODEL_TENSOR.CLS_OUT: (
|
|
@@ -905,6 +917,7 @@ class TensorNameMap:
|
|
|
905
917
|
|
|
906
918
|
MODEL_TENSOR.V_MMPROJ_MLP: (
|
|
907
919
|
"model.mm_projector.mlp.mlp.{bid}",
|
|
920
|
+
"vision_model.vision_adapter.mlp.fc{bid}", # llama 4
|
|
908
921
|
"mlp1.{bid}", # InternVL
|
|
909
922
|
),
|
|
910
923
|
|
|
@@ -914,6 +927,7 @@ class TensorNameMap:
|
|
|
914
927
|
|
|
915
928
|
MODEL_TENSOR.V_ENC_EMBD_CLS: (
|
|
916
929
|
"vision_tower.vision_model.embeddings.class_embedding",
|
|
930
|
+
"vision_model.class_embedding", # llama 4
|
|
917
931
|
),
|
|
918
932
|
|
|
919
933
|
MODEL_TENSOR.V_ENC_EMBD_PATCH: (
|
|
@@ -921,6 +935,7 @@ class TensorNameMap:
|
|
|
921
935
|
"vpm.embeddings.patch_embedding",
|
|
922
936
|
"model.vision_model.embeddings.patch_embedding", # SmolVLM
|
|
923
937
|
"vision_tower.patch_conv", # pixtral
|
|
938
|
+
"vision_model.patch_embedding.linear", # llama 4
|
|
924
939
|
"visual.patch_embed.proj", # qwen2vl
|
|
925
940
|
),
|
|
926
941
|
|
|
@@ -928,12 +943,14 @@ class TensorNameMap:
|
|
|
928
943
|
"vision_tower.vision_model.embeddings.position_embedding",
|
|
929
944
|
"vpm.embeddings.position_embedding",
|
|
930
945
|
"model.vision_model.embeddings.position_embedding", # SmolVLM
|
|
946
|
+
"vision_model.positional_embedding_vlm", # llama 4
|
|
931
947
|
),
|
|
932
948
|
|
|
933
949
|
MODEL_TENSOR.V_ENC_ATTN_Q: (
|
|
934
950
|
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
|
|
935
951
|
"vpm.encoder.layers.{bid}.self_attn.q_proj",
|
|
936
952
|
"model.vision_model.encoder.layers.{bid}.self_attn.q_proj", # SmolVLM
|
|
953
|
+
"vision_model.model.layers.{bid}.self_attn.q_proj", # llama4
|
|
937
954
|
"vision_tower.transformer.layers.{bid}.attention.q_proj", # pixtral
|
|
938
955
|
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
|
|
939
956
|
),
|
|
@@ -946,6 +963,7 @@ class TensorNameMap:
|
|
|
946
963
|
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
|
|
947
964
|
"vpm.encoder.layers.{bid}.self_attn.k_proj",
|
|
948
965
|
"model.vision_model.encoder.layers.{bid}.self_attn.k_proj", # SmolVLM
|
|
966
|
+
"vision_model.model.layers.{bid}.self_attn.k_proj", # llama4
|
|
949
967
|
"vision_tower.transformer.layers.{bid}.attention.k_proj", # pixtral
|
|
950
968
|
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
|
|
951
969
|
),
|
|
@@ -958,6 +976,7 @@ class TensorNameMap:
|
|
|
958
976
|
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
|
|
959
977
|
"vpm.encoder.layers.{bid}.self_attn.v_proj",
|
|
960
978
|
"model.vision_model.encoder.layers.{bid}.self_attn.v_proj", # SmolVLM
|
|
979
|
+
"vision_model.model.layers.{bid}.self_attn.v_proj", # llama4
|
|
961
980
|
"vision_tower.transformer.layers.{bid}.attention.v_proj", # pixtral
|
|
962
981
|
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
|
|
963
982
|
),
|
|
@@ -968,23 +987,26 @@ class TensorNameMap:
|
|
|
968
987
|
"vpm.encoder.layers.{bid}.layer_norm1",
|
|
969
988
|
"model.vision_model.encoder.layers.{bid}.layer_norm1", # SmolVLM
|
|
970
989
|
"vision_tower.transformer.layers.{bid}.attention_norm", # pixtral
|
|
990
|
+
"vision_model.model.layers.{bid}.input_layernorm", # llama4
|
|
971
991
|
"visual.blocks.{bid}.norm1", # qwen2vl
|
|
972
992
|
),
|
|
973
993
|
|
|
974
|
-
MODEL_TENSOR.
|
|
994
|
+
MODEL_TENSOR.V_ENC_ATTN_O: (
|
|
975
995
|
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
|
|
976
996
|
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
|
|
977
997
|
"vpm.encoder.layers.{bid}.self_attn.out_proj",
|
|
978
998
|
"model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM
|
|
999
|
+
"vision_model.model.layers.{bid}.self_attn.o_proj", # llama4
|
|
979
1000
|
"vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral
|
|
980
1001
|
"visual.blocks.{bid}.attn.proj", # qwen2vl
|
|
981
1002
|
),
|
|
982
1003
|
|
|
983
|
-
MODEL_TENSOR.
|
|
1004
|
+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
|
|
984
1005
|
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
|
|
985
1006
|
"vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
|
|
986
1007
|
"vpm.encoder.layers.{bid}.layer_norm2",
|
|
987
1008
|
"model.vision_model.encoder.layers.{bid}.layer_norm2", # SmolVLM
|
|
1009
|
+
"vision_model.model.layers.{bid}.post_attention_layernorm", # llama4
|
|
988
1010
|
"vision_tower.transformer.layers.{bid}.ffn_norm", # pixtral
|
|
989
1011
|
"visual.blocks.{bid}.norm2", # qwen2vl
|
|
990
1012
|
),
|
|
@@ -994,6 +1016,7 @@ class TensorNameMap:
|
|
|
994
1016
|
"vpm.encoder.layers.{bid}.mlp.fc1",
|
|
995
1017
|
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
|
|
996
1018
|
"vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral
|
|
1019
|
+
"vision_model.model.layers.{bid}.mlp.fc1", # llama4
|
|
997
1020
|
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
|
|
998
1021
|
"visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
|
|
999
1022
|
),
|
|
@@ -1008,6 +1031,7 @@ class TensorNameMap:
|
|
|
1008
1031
|
"vpm.encoder.layers.{bid}.mlp.fc2",
|
|
1009
1032
|
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
|
|
1010
1033
|
"vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral
|
|
1034
|
+
"vision_model.model.layers.{bid}.mlp.fc2", # llama4
|
|
1011
1035
|
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
|
|
1012
1036
|
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
|
|
1013
1037
|
),
|
|
@@ -1023,11 +1047,13 @@ class TensorNameMap:
|
|
|
1023
1047
|
MODEL_TENSOR.V_PRE_NORM: (
|
|
1024
1048
|
"vision_tower.vision_model.pre_layrnorm",
|
|
1025
1049
|
"vision_tower.ln_pre", # pixtral
|
|
1050
|
+
"vision_model.layernorm_pre", # llama4
|
|
1026
1051
|
),
|
|
1027
1052
|
|
|
1028
1053
|
MODEL_TENSOR.V_POST_NORM: (
|
|
1029
1054
|
"vision_tower.vision_model.post_layernorm",
|
|
1030
1055
|
"model.vision_model.post_layernorm", # SmolVLM
|
|
1056
|
+
"vision_model.layernorm_post", # llama4
|
|
1031
1057
|
"visual.merger.ln_q", # qwen2vl
|
|
1032
1058
|
),
|
|
1033
1059
|
|
|
@@ -1094,6 +1120,77 @@ class TensorNameMap:
|
|
|
1094
1120
|
MODEL_TENSOR.V_MM_PATCH_MERGER: (
|
|
1095
1121
|
"multi_modal_projector.patch_merger.merging_layer", # mistral small 3.1
|
|
1096
1122
|
),
|
|
1123
|
+
|
|
1124
|
+
# audio (mtmd)
|
|
1125
|
+
|
|
1126
|
+
MODEL_TENSOR.A_ENC_EMBD_POS: (
|
|
1127
|
+
"audio_tower.embed_positions", # ultravox
|
|
1128
|
+
),
|
|
1129
|
+
|
|
1130
|
+
MODEL_TENSOR.A_ENC_CONV1D: (
|
|
1131
|
+
"audio_tower.conv{bid}", # ultravox
|
|
1132
|
+
),
|
|
1133
|
+
|
|
1134
|
+
MODEL_TENSOR.A_PRE_NORM: (),
|
|
1135
|
+
|
|
1136
|
+
MODEL_TENSOR.A_POST_NORM: (
|
|
1137
|
+
"audio_tower.layer_norm", # ultravox
|
|
1138
|
+
"audio_tower.ln_post", # qwen2omni
|
|
1139
|
+
),
|
|
1140
|
+
|
|
1141
|
+
MODEL_TENSOR.A_ENC_ATTN_Q: (
|
|
1142
|
+
"audio_tower.layers.{bid}.self_attn.q_proj", # ultravox
|
|
1143
|
+
),
|
|
1144
|
+
|
|
1145
|
+
MODEL_TENSOR.A_ENC_ATTN_K: (
|
|
1146
|
+
"audio_tower.layers.{bid}.self_attn.k_proj", # ultravox
|
|
1147
|
+
),
|
|
1148
|
+
|
|
1149
|
+
MODEL_TENSOR.A_ENC_ATTN_V: (
|
|
1150
|
+
"audio_tower.layers.{bid}.self_attn.v_proj", # ultravox
|
|
1151
|
+
),
|
|
1152
|
+
|
|
1153
|
+
MODEL_TENSOR.A_ENC_INPUT_NORM: (
|
|
1154
|
+
"audio_tower.layers.{bid}.self_attn_layer_norm", # ultravox
|
|
1155
|
+
),
|
|
1156
|
+
|
|
1157
|
+
MODEL_TENSOR.A_ENC_OUTPUT: (
|
|
1158
|
+
"audio_tower.layers.{bid}.self_attn.out_proj", # ultravox
|
|
1159
|
+
),
|
|
1160
|
+
|
|
1161
|
+
MODEL_TENSOR.A_ENC_OUTPUT_NORM: (
|
|
1162
|
+
"audio_tower.layers.{bid}.final_layer_norm", # ultravox
|
|
1163
|
+
),
|
|
1164
|
+
|
|
1165
|
+
MODEL_TENSOR.A_ENC_FFN_UP: (
|
|
1166
|
+
"audio_tower.layers.{bid}.fc1", # ultravox
|
|
1167
|
+
),
|
|
1168
|
+
|
|
1169
|
+
MODEL_TENSOR.A_ENC_FFN_GATE: (),
|
|
1170
|
+
|
|
1171
|
+
MODEL_TENSOR.A_ENC_FFN_DOWN: (
|
|
1172
|
+
"audio_tower.layers.{bid}.fc2", # ultravox
|
|
1173
|
+
),
|
|
1174
|
+
|
|
1175
|
+
# note: some tensors below has "audio." pseudo-prefix, to prevent conflicts with vision tensors
|
|
1176
|
+
# this prefix is added in the conversion code in modify_tensors()
|
|
1177
|
+
|
|
1178
|
+
MODEL_TENSOR.A_MMPROJ: (
|
|
1179
|
+
"audio.multi_modal_projector.linear_{bid}", # ultravox
|
|
1180
|
+
),
|
|
1181
|
+
|
|
1182
|
+
MODEL_TENSOR.A_MMPROJ_FC: (
|
|
1183
|
+
"audio.multi_modal_projector.linear", # qwen2audio
|
|
1184
|
+
"audio_tower.proj", # qwen2omni
|
|
1185
|
+
),
|
|
1186
|
+
|
|
1187
|
+
MODEL_TENSOR.A_MM_NORM_PRE: (
|
|
1188
|
+
"audio.multi_modal_projector.ln_pre", # ultravox
|
|
1189
|
+
),
|
|
1190
|
+
|
|
1191
|
+
MODEL_TENSOR.A_MM_NORM_MID: (
|
|
1192
|
+
"audio.multi_modal_projector.ln_mid", # ultravox
|
|
1193
|
+
),
|
|
1097
1194
|
}
|
|
1098
1195
|
|
|
1099
1196
|
# architecture-specific block mappings
|
|
@@ -231,7 +231,7 @@ class SafetensorRemote:
|
|
|
231
231
|
response.raise_for_status()
|
|
232
232
|
|
|
233
233
|
# Get raw byte data
|
|
234
|
-
return response.content[
|
|
234
|
+
return response.content[slice(size if size > -1 else None)]
|
|
235
235
|
|
|
236
236
|
@classmethod
|
|
237
237
|
def check_file_exist(cls, url: str) -> bool:
|