@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
|
14
14
|
"transformer.word_embeddings", # falcon
|
|
15
15
|
"word_embeddings", # bloom
|
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
|
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
|
|
17
17
|
"tok_embeddings", # llama-pth
|
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
|
@@ -50,6 +50,7 @@ class TensorNameMap:
|
|
|
50
50
|
"model.pre_ln", # rwkv7
|
|
51
51
|
"model.layers.0.pre_norm", # rwkv7
|
|
52
52
|
"backbone.norm", # wavtokenizer
|
|
53
|
+
"model.embedding_norm", # lfm2
|
|
53
54
|
),
|
|
54
55
|
|
|
55
56
|
# Position embeddings
|
|
@@ -118,7 +119,7 @@ class TensorNameMap:
|
|
|
118
119
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
|
119
120
|
"h.{bid}.input_layernorm", # bloom
|
|
120
121
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
|
121
|
-
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
|
122
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
|
|
122
123
|
"layers.{bid}.attention_norm", # llama-pth
|
|
123
124
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
|
124
125
|
"model.layers.{bid}.ln1", # yi
|
|
@@ -136,6 +137,7 @@ class TensorNameMap:
|
|
|
136
137
|
"model.layers.{bid}.ln1", # rwkv7
|
|
137
138
|
"model.layers.{bid}.input_layernorm", # llama4
|
|
138
139
|
"transformer_encoder.{bid}.attention_norm", # neobert
|
|
140
|
+
"model.layers.{bid}.operator_norm", # lfm2
|
|
139
141
|
),
|
|
140
142
|
|
|
141
143
|
# Attention norm 2
|
|
@@ -220,6 +222,7 @@ class TensorNameMap:
|
|
|
220
222
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
|
221
223
|
"h.{bid}.self_attention.dense", # bloom
|
|
222
224
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
|
225
|
+
"model.layers.{bid}.self_attn.out_proj", # lfm2
|
|
223
226
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
|
224
227
|
"layers.{bid}.attention.wo", # llama-pth
|
|
225
228
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
|
@@ -279,6 +282,8 @@ class TensorNameMap:
|
|
|
279
282
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
|
280
283
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
281
284
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
285
|
+
"model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
|
|
286
|
+
"model.layers.{bid}.pre_moe_layernorm", # mini-jamba
|
|
282
287
|
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
283
288
|
"transformer_encoder.{bid}.ffn_norm", # neobert
|
|
284
289
|
),
|
|
@@ -286,12 +291,14 @@ class TensorNameMap:
|
|
|
286
291
|
# Post feed-forward norm
|
|
287
292
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
|
288
293
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
|
294
|
+
"model.layers.{bid}.pre_ff_layernorm.weight",
|
|
289
295
|
),
|
|
290
296
|
|
|
291
297
|
# Post feed-forward norm
|
|
292
298
|
MODEL_TENSOR.FFN_POST_NORM: (
|
|
293
299
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
|
294
300
|
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
|
|
301
|
+
"model.layers.{bid}.feed_forward.up_proj",
|
|
295
302
|
),
|
|
296
303
|
|
|
297
304
|
MODEL_TENSOR.FFN_GATE_INP: (
|
|
@@ -301,8 +308,9 @@ class TensorNameMap:
|
|
|
301
308
|
"transformer.decoder_layer.{bid}.router", # Grok
|
|
302
309
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
|
303
310
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
|
304
|
-
"model.layers.{bid}.feed_forward.router", # llama4
|
|
311
|
+
"model.layers.{bid}.feed_forward.router", # llama4 jamba
|
|
305
312
|
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
|
313
|
+
"model.layers.{bid}.mlp.gate.wg", # hunyuan
|
|
306
314
|
),
|
|
307
315
|
|
|
308
316
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
|
@@ -344,7 +352,7 @@ class TensorNameMap:
|
|
|
344
352
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
345
353
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
346
354
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
347
|
-
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
355
|
+
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
|
|
348
356
|
"transformer_encoder.{bid}.ffn.w12", # neobert
|
|
349
357
|
),
|
|
350
358
|
|
|
@@ -362,6 +370,8 @@ class TensorNameMap:
|
|
|
362
370
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
|
363
371
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
|
364
372
|
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
|
373
|
+
"model.layers.{bid}.feed_forward.down_proj",
|
|
374
|
+
"model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
|
|
365
375
|
),
|
|
366
376
|
|
|
367
377
|
# AWQ-activation gate
|
|
@@ -382,7 +392,7 @@ class TensorNameMap:
|
|
|
382
392
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
383
393
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
384
394
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
385
|
-
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
|
395
|
+
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
|
|
386
396
|
),
|
|
387
397
|
|
|
388
398
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
|
@@ -398,6 +408,7 @@ class TensorNameMap:
|
|
|
398
408
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
|
399
409
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
|
400
410
|
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
|
411
|
+
"model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
|
|
401
412
|
),
|
|
402
413
|
|
|
403
414
|
# Feed-forward down
|
|
@@ -427,7 +438,7 @@ class TensorNameMap:
|
|
|
427
438
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
|
428
439
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
429
440
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
430
|
-
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
441
|
+
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
|
|
431
442
|
"transformer_encoder.{bid}.ffn.w3", # neobert
|
|
432
443
|
),
|
|
433
444
|
|
|
@@ -447,11 +458,13 @@ class TensorNameMap:
|
|
|
447
458
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
|
448
459
|
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
|
449
460
|
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
|
|
461
|
+
"model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
|
|
450
462
|
),
|
|
451
463
|
|
|
452
464
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
|
453
465
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
|
454
466
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
|
467
|
+
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
|
|
455
468
|
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
|
456
469
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
|
457
470
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
|
@@ -461,6 +474,7 @@ class TensorNameMap:
|
|
|
461
474
|
MODEL_TENSOR.ATTN_K_NORM: (
|
|
462
475
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
|
463
476
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
|
477
|
+
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
|
|
464
478
|
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
|
465
479
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
|
466
480
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
|
@@ -477,42 +491,132 @@ class TensorNameMap:
|
|
|
477
491
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
|
478
492
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
|
479
493
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
|
480
|
-
"encoder.layer.{bid}.layer_norm_2"
|
|
494
|
+
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
|
495
|
+
),
|
|
496
|
+
|
|
497
|
+
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
|
|
498
|
+
"model.embed_tokens_per_layer", # gemma3n
|
|
499
|
+
),
|
|
500
|
+
|
|
501
|
+
MODEL_TENSOR.PER_LAYER_MODEL_PROJ: (
|
|
502
|
+
"model.per_layer_model_projection", # gemma3n
|
|
503
|
+
),
|
|
504
|
+
|
|
505
|
+
MODEL_TENSOR.PER_LAYER_PROJ_NORM: (
|
|
506
|
+
"model.per_layer_projection_norm", # gemma3n
|
|
507
|
+
),
|
|
508
|
+
|
|
509
|
+
MODEL_TENSOR.ALTUP_PROJ: (
|
|
510
|
+
"model.altup_projections", # gemma3n
|
|
511
|
+
),
|
|
512
|
+
|
|
513
|
+
MODEL_TENSOR.ALTUP_UNEMBD_PROJ: (
|
|
514
|
+
"model.altup_unembed_projections", # gemma3n
|
|
515
|
+
),
|
|
516
|
+
|
|
517
|
+
MODEL_TENSOR.PER_LAYER_INP_GATE: (
|
|
518
|
+
"model.layers.{bid}.per_layer_input_gate", # gemma3n
|
|
519
|
+
),
|
|
520
|
+
|
|
521
|
+
MODEL_TENSOR.PER_LAYER_PROJ: (
|
|
522
|
+
"model.layers.{bid}.per_layer_projection", # gemma3n
|
|
523
|
+
),
|
|
524
|
+
|
|
525
|
+
MODEL_TENSOR.PER_LAYER_POST_NORM: (
|
|
526
|
+
"model.layers.{bid}.post_per_layer_input_norm", # gemma3n
|
|
527
|
+
),
|
|
528
|
+
|
|
529
|
+
MODEL_TENSOR.ALTUP_CORRECT_COEF: (
|
|
530
|
+
"model.layers.{bid}.altup.correction_coefs", # gemma3n
|
|
531
|
+
),
|
|
532
|
+
|
|
533
|
+
MODEL_TENSOR.ALTUP_CORRECT_SCALE: (
|
|
534
|
+
"model.layers.{bid}.altup.correct_output_scale", # gemma3n
|
|
535
|
+
),
|
|
536
|
+
|
|
537
|
+
MODEL_TENSOR.ALTUP_PREDICT_COEF: (
|
|
538
|
+
"model.layers.{bid}.altup.prediction_coefs", # gemma3n
|
|
539
|
+
),
|
|
540
|
+
|
|
541
|
+
MODEL_TENSOR.ALTUP_ROUTER: (
|
|
542
|
+
"model.layers.{bid}.altup.modality_router", # gemma3n
|
|
543
|
+
),
|
|
544
|
+
|
|
545
|
+
MODEL_TENSOR.ALTUP_ROUTER_NORM: (
|
|
546
|
+
"model.layers.{bid}.altup.router_norm", # gemma3n
|
|
547
|
+
),
|
|
548
|
+
|
|
549
|
+
MODEL_TENSOR.LAUREL_L: (
|
|
550
|
+
"model.layers.{bid}.laurel.linear_left", # gemma3n
|
|
551
|
+
),
|
|
552
|
+
|
|
553
|
+
MODEL_TENSOR.LAUREL_R: (
|
|
554
|
+
"model.layers.{bid}.laurel.linear_right", # gemma3n
|
|
555
|
+
),
|
|
556
|
+
|
|
557
|
+
MODEL_TENSOR.LAUREL_POST_NORM: (
|
|
558
|
+
"model.layers.{bid}.laurel.post_laurel_norm", # gemma3n
|
|
481
559
|
),
|
|
482
560
|
|
|
483
561
|
MODEL_TENSOR.SSM_IN: (
|
|
484
|
-
"model.layers.{bid}.in_proj",
|
|
485
|
-
"backbone.layers.{bid}.mixer.in_proj",
|
|
562
|
+
"model.layers.{bid}.in_proj", # mamba-hf
|
|
563
|
+
"backbone.layers.{bid}.mixer.in_proj", # mamba
|
|
564
|
+
"model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
|
|
486
565
|
),
|
|
487
566
|
|
|
488
567
|
MODEL_TENSOR.SSM_CONV1D: (
|
|
489
|
-
"model.layers.{bid}.conv1d",
|
|
490
|
-
"backbone.layers.{bid}.mixer.conv1d",
|
|
568
|
+
"model.layers.{bid}.conv1d", # mamba-hf
|
|
569
|
+
"backbone.layers.{bid}.mixer.conv1d", # mamba
|
|
570
|
+
"model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
|
|
491
571
|
),
|
|
492
572
|
|
|
493
573
|
MODEL_TENSOR.SSM_X: (
|
|
494
|
-
"model.layers.{bid}.x_proj",
|
|
495
|
-
"backbone.layers.{bid}.mixer.x_proj",
|
|
574
|
+
"model.layers.{bid}.x_proj", # mamba-hf
|
|
575
|
+
"backbone.layers.{bid}.mixer.x_proj", # mamba
|
|
576
|
+
"model.layers.{bid}.mamba.x_proj", # jamba
|
|
496
577
|
),
|
|
497
578
|
|
|
498
579
|
MODEL_TENSOR.SSM_DT: (
|
|
499
|
-
"model.layers.{bid}.dt_proj",
|
|
500
|
-
"backbone.layers.{bid}.mixer.dt_proj",
|
|
580
|
+
"model.layers.{bid}.dt_proj", # mamba-hf
|
|
581
|
+
"backbone.layers.{bid}.mixer.dt_proj", # mamba
|
|
582
|
+
"model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
|
|
583
|
+
),
|
|
584
|
+
|
|
585
|
+
MODEL_TENSOR.SSM_DT_NORM: (
|
|
586
|
+
"model.layers.{bid}.mamba.dt_layernorm", # jamba
|
|
501
587
|
),
|
|
502
588
|
|
|
503
589
|
MODEL_TENSOR.SSM_A: (
|
|
504
|
-
"model.layers.{bid}.A_log",
|
|
505
|
-
"backbone.layers.{bid}.mixer.A_log",
|
|
590
|
+
"model.layers.{bid}.A_log", # mamba-hf
|
|
591
|
+
"backbone.layers.{bid}.mixer.A_log", # mamba
|
|
592
|
+
"model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
|
|
593
|
+
),
|
|
594
|
+
|
|
595
|
+
MODEL_TENSOR.SSM_B_NORM: (
|
|
596
|
+
"model.layers.{bid}.mamba.b_layernorm", # jamba
|
|
597
|
+
"model.layers.{bid}.mamba.B_layernorm", # mini-jamba
|
|
598
|
+
),
|
|
599
|
+
|
|
600
|
+
MODEL_TENSOR.SSM_C_NORM: (
|
|
601
|
+
"model.layers.{bid}.mamba.c_layernorm", # jamba
|
|
602
|
+
"model.layers.{bid}.mamba.C_layernorm", # mini-jamba
|
|
506
603
|
),
|
|
507
604
|
|
|
508
605
|
MODEL_TENSOR.SSM_D: (
|
|
509
|
-
"model.layers.{bid}.D",
|
|
510
|
-
"backbone.layers.{bid}.mixer.D",
|
|
606
|
+
"model.layers.{bid}.D", # mamba-hf
|
|
607
|
+
"backbone.layers.{bid}.mixer.D", # mamba
|
|
608
|
+
"model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
|
|
609
|
+
),
|
|
610
|
+
|
|
611
|
+
MODEL_TENSOR.SSM_NORM: (
|
|
612
|
+
"model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
|
|
613
|
+
"backbone.layers.{bid}.mixer.norm", # mamba2
|
|
511
614
|
),
|
|
512
615
|
|
|
513
616
|
MODEL_TENSOR.SSM_OUT: (
|
|
514
|
-
"model.layers.{bid}.out_proj",
|
|
515
|
-
"backbone.layers.{bid}.mixer.out_proj",
|
|
617
|
+
"model.layers.{bid}.out_proj", # mamba-hf
|
|
618
|
+
"backbone.layers.{bid}.mixer.out_proj", # mamba
|
|
619
|
+
"model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
|
|
516
620
|
),
|
|
517
621
|
|
|
518
622
|
MODEL_TENSOR.TIME_MIX_W0: (
|
|
@@ -914,6 +1018,18 @@ class TensorNameMap:
|
|
|
914
1018
|
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
|
915
1019
|
),
|
|
916
1020
|
|
|
1021
|
+
MODEL_TENSOR.SHORTCONV_CONV: (
|
|
1022
|
+
"model.layers.{bid}.conv.conv",
|
|
1023
|
+
),
|
|
1024
|
+
|
|
1025
|
+
MODEL_TENSOR.SHORTCONV_INPROJ: (
|
|
1026
|
+
"model.layers.{bid}.conv.in_proj",
|
|
1027
|
+
),
|
|
1028
|
+
|
|
1029
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ: (
|
|
1030
|
+
"model.layers.{bid}.conv.out_proj",
|
|
1031
|
+
),
|
|
1032
|
+
|
|
917
1033
|
#############################################################################
|
|
918
1034
|
## Vision encoder
|
|
919
1035
|
|
|
@@ -7,7 +7,10 @@ import os
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
try:
|
|
11
|
+
from sentencepiece import SentencePieceProcessor
|
|
12
|
+
except ImportError:
|
|
13
|
+
SentencePieceProcessor = None
|
|
11
14
|
|
|
12
15
|
import gguf
|
|
13
16
|
|
|
@@ -116,6 +119,7 @@ class SpecialVocab:
|
|
|
116
119
|
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
|
117
120
|
|
|
118
121
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
|
122
|
+
tokenizer = None
|
|
119
123
|
tokenizer_file = path / 'tokenizer.json'
|
|
120
124
|
if tokenizer_file.is_file():
|
|
121
125
|
with open(tokenizer_file, encoding = 'utf-8') as f:
|
|
@@ -149,15 +153,110 @@ class SpecialVocab:
|
|
|
149
153
|
added_tokens = tokenizer.get('added_tokens', {})
|
|
150
154
|
else:
|
|
151
155
|
added_tokens = {}
|
|
156
|
+
tokenizer_config = None
|
|
152
157
|
tokenizer_config_file = path / 'tokenizer_config.json'
|
|
153
|
-
if
|
|
158
|
+
if tokenizer_config_file.is_file():
|
|
159
|
+
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
|
160
|
+
tokenizer_config = json.load(f)
|
|
161
|
+
if tokenizer:
|
|
162
|
+
special_bos = (tokenizer_config or {}).get('bos_token')
|
|
163
|
+
special_cls = (tokenizer_config or {}).get('cls_token')
|
|
164
|
+
special_eos = (tokenizer_config or {}).get('eos_token')
|
|
165
|
+
special_sep = (tokenizer_config or {}).get('sep_token')
|
|
166
|
+
if not special_bos and special_cls and tokenizer_config:
|
|
167
|
+
tokenizer_config['bos_token'] = special_bos = special_cls
|
|
168
|
+
if not special_eos and special_sep and tokenizer_config:
|
|
169
|
+
tokenizer_config['eos_token'] = special_eos = special_sep
|
|
170
|
+
if post_processor := tokenizer.get('post_processor'):
|
|
171
|
+
for processor in post_processor.get('processors', [post_processor]):
|
|
172
|
+
if processor.get('type') == 'RobertaProcessing':
|
|
173
|
+
self.add_special_token['bos'] = True
|
|
174
|
+
self.add_special_token['eos'] = True
|
|
175
|
+
self.add_special_token['sep'] = True
|
|
176
|
+
if not special_cls and tokenizer_config:
|
|
177
|
+
special_cls = processor.get('cls', [special_bos])[0]
|
|
178
|
+
tokenizer_config['cls_token'] = special_cls
|
|
179
|
+
if not special_sep and tokenizer_config:
|
|
180
|
+
special_sep = processor.get('sep', [special_eos])[0]
|
|
181
|
+
tokenizer_config['sep_token'] = special_sep
|
|
182
|
+
continue
|
|
183
|
+
# Crude parsing of TemplateProcessing to determine if BOS/SEP/EOS should be added
|
|
184
|
+
# Only works with simple templates, **will** get it wrong on unusual sequences
|
|
185
|
+
if processor.get('type') == 'TemplateProcessing':
|
|
186
|
+
tmpl_single = processor.get('single', [])
|
|
187
|
+
tmpl_pair = processor.get('pair', [])
|
|
188
|
+
special_first = None
|
|
189
|
+
special_last = None
|
|
190
|
+
if len(tmpl_single) > 1:
|
|
191
|
+
if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
|
|
192
|
+
if not tokenizer_config:
|
|
193
|
+
special_bos = special_first
|
|
194
|
+
self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
|
|
195
|
+
if special_first not in (special_bos, special_cls):
|
|
196
|
+
logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')
|
|
197
|
+
if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'):
|
|
198
|
+
if not tokenizer_config:
|
|
199
|
+
special_eos = special_last
|
|
200
|
+
elif special_last != special_eos:
|
|
201
|
+
if 'eot' not in self.special_token_types:
|
|
202
|
+
self.special_token_types = tuple(self.special_token_types) + ('eot', )
|
|
203
|
+
tokenizer_config['eot_token'] = special_eos
|
|
204
|
+
elif 'eom' not in self.special_token_types:
|
|
205
|
+
self.special_token_types = tuple(self.special_token_types) + ('eom', )
|
|
206
|
+
tokenizer_config['eom_token'] = special_eos
|
|
207
|
+
else:
|
|
208
|
+
logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!')
|
|
209
|
+
tokenizer_config['eos_token'] = special_eos = special_last
|
|
210
|
+
self.add_special_token['eos'] = True if special_last == special_eos else False
|
|
211
|
+
if special_last != special_eos:
|
|
212
|
+
logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing<single>')
|
|
213
|
+
if tmpl_pair:
|
|
214
|
+
seq_start = 1 if special_first and tmpl_pair[0].get('SpecialToken', {}).get('id') == special_first else 0
|
|
215
|
+
seq_stop = -1 if special_last and tmpl_pair[-1].get('SpecialToken', {}).get('id') == special_last else None
|
|
216
|
+
if (special_first and seq_start == 0) or (special_last and seq_stop is None):
|
|
217
|
+
logger.warning('TemplateProcessing<single> leading/trailing special tokens do not match TemplateProcessing<pair>')
|
|
218
|
+
if tmpl_pair := tmpl_pair[slice(seq_start, seq_stop)]:
|
|
219
|
+
tmpl_a = tmpl_pair[0].get('Sequence', {}).get('id')
|
|
220
|
+
tmpl_b = tmpl_pair[-1].get('Sequence', {}).get('id')
|
|
221
|
+
if tmpl_a != 'A' or tmpl_b != 'B':
|
|
222
|
+
logger.warning(f'Unknown sequence {tmpl_a}...{tmpl_b} in TemplateProcessing<pair>')
|
|
223
|
+
# A [sep] [eos] B
|
|
224
|
+
if tmpl_a == 'A' and tmpl_b == 'B' and (tmpl_pair := tmpl_pair[1:-1]):
|
|
225
|
+
add_sep = False
|
|
226
|
+
if special_entry := tmpl_pair[0].get('SpecialToken', {}).get('id'):
|
|
227
|
+
if special_entry in (special_sep, special_eos) and not special_last:
|
|
228
|
+
add_sep = True
|
|
229
|
+
if special_entry not in (special_sep, special_eos):
|
|
230
|
+
logger.warning(f'Unknown separator token {special_entry!r} in TemplateProcessing<pair>')
|
|
231
|
+
else:
|
|
232
|
+
logger.warning(f'Unknown middle sequence {tmpl_pair[0]!r} in TemplateProcessing<pair>')
|
|
233
|
+
if len(tmpl_pair) == 2:
|
|
234
|
+
if special_entry := tmpl_pair[1].get('SpecialToken', {}).get('id'):
|
|
235
|
+
if special_entry in (special_sep, special_eos):
|
|
236
|
+
add_sep = True
|
|
237
|
+
if special_entry not in (special_sep, special_eos):
|
|
238
|
+
logger.warning(f'Unknown second separator token {special_entry!r} in TemplateProcessing<pair>')
|
|
239
|
+
else:
|
|
240
|
+
logger.warning(f'Unknown second middle sequence {tmpl_pair[1]!r} in TemplateProcessing<pair>')
|
|
241
|
+
self.add_special_token['sep'] = add_sep
|
|
242
|
+
if add_sep and not special_sep and tokenizer_config:
|
|
243
|
+
tokenizer_config['sep_token'] = special_eos
|
|
244
|
+
continue
|
|
245
|
+
if not tokenizer_config:
|
|
154
246
|
return True
|
|
155
|
-
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
|
156
|
-
tokenizer_config = json.load(f)
|
|
157
247
|
chat_template_alt = None
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
248
|
+
chat_template_json = path / 'chat_template.json'
|
|
249
|
+
chat_template_jinja = path / 'chat_template.jinja'
|
|
250
|
+
if chat_template_jinja.is_file():
|
|
251
|
+
with open(chat_template_jinja, encoding = 'utf-8') as f:
|
|
252
|
+
chat_template_alt = f.read()
|
|
253
|
+
if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
|
|
254
|
+
chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
|
|
255
|
+
for template_path in additional_templates:
|
|
256
|
+
with open(template_path, encoding = 'utf-8') as fp:
|
|
257
|
+
chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
|
|
258
|
+
elif chat_template_json.is_file():
|
|
259
|
+
with open(chat_template_json, encoding = 'utf-8') as f:
|
|
161
260
|
chat_template_alt = json.load(f).get('chat_template')
|
|
162
261
|
chat_template = tokenizer_config.get('chat_template', chat_template_alt)
|
|
163
262
|
if chat_template is None or isinstance(chat_template, (str, list)):
|
|
@@ -302,6 +401,9 @@ class SentencePieceVocab(Vocab):
|
|
|
302
401
|
name = "spm"
|
|
303
402
|
|
|
304
403
|
def __init__(self, base_path: Path):
|
|
404
|
+
if SentencePieceProcessor is None:
|
|
405
|
+
raise RuntimeError("sentencepiece is not installed")
|
|
406
|
+
|
|
305
407
|
added_tokens: dict[str, int] = {}
|
|
306
408
|
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
|
307
409
|
# normal location
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "gguf"
|
|
3
|
-
version = "0.17.
|
|
3
|
+
version = "0.17.1"
|
|
4
4
|
description = "Read and write ML models in GGUF for GGML"
|
|
5
5
|
authors = ["GGML <ggml@ggml.ai>"]
|
|
6
6
|
packages = [
|
|
@@ -22,7 +22,7 @@ python = ">=3.8"
|
|
|
22
22
|
numpy = ">=1.17"
|
|
23
23
|
tqdm = ">=4.27"
|
|
24
24
|
pyyaml = ">=5.1"
|
|
25
|
-
sentencepiece = ">=0.1.98,<=0.2.0"
|
|
25
|
+
sentencepiece = { version = ">=0.1.98,<=0.2.0", optional = true }
|
|
26
26
|
PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true }
|
|
27
27
|
|
|
28
28
|
[tool.poetry.dev-dependencies]
|
|
@@ -79,46 +79,6 @@ extern "C" {
|
|
|
79
79
|
LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
|
|
80
80
|
};
|
|
81
81
|
|
|
82
|
-
// pre-tokenization types
|
|
83
|
-
enum llama_vocab_pre_type {
|
|
84
|
-
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
|
|
85
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
|
|
86
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
|
|
87
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
|
|
88
|
-
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
|
|
89
|
-
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
|
|
90
|
-
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
|
|
91
|
-
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
|
92
|
-
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
|
93
|
-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
|
94
|
-
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
|
95
|
-
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
|
96
|
-
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
|
97
|
-
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
|
98
|
-
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
|
99
|
-
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
100
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
|
101
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
|
102
|
-
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
|
103
|
-
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
|
104
|
-
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
|
105
|
-
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
|
106
|
-
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
|
107
|
-
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
|
108
|
-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
|
109
|
-
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
|
110
|
-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
111
|
-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
112
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
113
|
-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
114
|
-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
115
|
-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
116
|
-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
117
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
118
|
-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
119
|
-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
120
|
-
};
|
|
121
|
-
|
|
122
82
|
enum llama_rope_type {
|
|
123
83
|
LLAMA_ROPE_TYPE_NONE = -1,
|
|
124
84
|
LLAMA_ROPE_TYPE_NORM = 0,
|
|
@@ -390,6 +350,7 @@ extern "C" {
|
|
|
390
350
|
void * imatrix; // pointer to importance matrix data
|
|
391
351
|
void * kv_overrides; // pointer to vector containing overrides
|
|
392
352
|
void * tensor_types; // pointer to vector containing tensor types
|
|
353
|
+
void * prune_layers; // pointer to vector containing layer indices to prune
|
|
393
354
|
} llama_model_quantize_params;
|
|
394
355
|
|
|
395
356
|
typedef struct llama_logit_bias {
|
|
@@ -943,12 +904,14 @@ extern "C" {
|
|
|
943
904
|
// Requires the context to have a memory.
|
|
944
905
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
945
906
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
946
|
-
// Upon
|
|
907
|
+
// Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
|
|
908
|
+
// To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
|
|
909
|
+
// Upon other return values, the memory state is restored to the state before this call
|
|
947
910
|
// 0 - success
|
|
948
911
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
949
|
-
// 2 - aborted
|
|
912
|
+
// 2 - aborted (processed ubatches will remain in the context's memory)
|
|
950
913
|
// -1 - invalid input batch
|
|
951
|
-
// < -1 - error
|
|
914
|
+
// < -1 - fatal error (processed ubatches will remain in the context's memory)
|
|
952
915
|
LLAMA_API int32_t llama_decode(
|
|
953
916
|
struct llama_context * ctx,
|
|
954
917
|
struct llama_batch batch);
|
|
@@ -1044,6 +1007,7 @@ extern "C" {
|
|
|
1044
1007
|
|
|
1045
1008
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1046
1009
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
1010
|
+
LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
|
|
1047
1011
|
|
|
1048
1012
|
LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
|
|
1049
1013
|
LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
|
|
@@ -1087,6 +1051,7 @@ extern "C" {
|
|
|
1087
1051
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
|
1088
1052
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
|
1089
1053
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
|
1054
|
+
/// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
|
|
1090
1055
|
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
|
1091
1056
|
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
|
1092
1057
|
/// as plaintext. Does not insert a leading space.
|