@novastera-oss/llamarn 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +8 -8
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +62 -1
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +22 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +15 -47
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
- package/cpp/llama.cpp/src/llama-arch.h +23 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
- package/cpp/llama.cpp/src/llama-batch.h +31 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
- package/cpp/llama.cpp/src/llama-chat.h +3 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
- package/cpp/llama.cpp/src/llama-graph.h +184 -122
- package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
- package/cpp/llama.cpp/src/llama-hparams.h +13 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
- package/cpp/llama.cpp/src/llama-model.h +21 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
- package/cpp/llama.cpp/src/llama-vocab.h +43 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +22 -4
- package/ios/include/llama.h +15 -47
- package/ios/libs/llama.xcframework/Info.plist +13 -13
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
|
14
14
|
"transformer.word_embeddings", # falcon
|
|
15
15
|
"word_embeddings", # bloom
|
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
|
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
|
|
17
17
|
"tok_embeddings", # llama-pth
|
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
|
@@ -50,6 +50,7 @@ class TensorNameMap:
|
|
|
50
50
|
"model.pre_ln", # rwkv7
|
|
51
51
|
"model.layers.0.pre_norm", # rwkv7
|
|
52
52
|
"backbone.norm", # wavtokenizer
|
|
53
|
+
"model.embedding_norm", # lfm2
|
|
53
54
|
),
|
|
54
55
|
|
|
55
56
|
# Position embeddings
|
|
@@ -62,7 +63,7 @@ class TensorNameMap:
|
|
|
62
63
|
# Output
|
|
63
64
|
MODEL_TENSOR.OUTPUT: (
|
|
64
65
|
"embed_out", # gptneox
|
|
65
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
|
|
66
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe plamo2
|
|
66
67
|
"output", # llama-pth bloom internlm2
|
|
67
68
|
"word_embeddings_for_head", # persimmon
|
|
68
69
|
"lm_head.linear", # phi2
|
|
@@ -76,7 +77,7 @@ class TensorNameMap:
|
|
|
76
77
|
MODEL_TENSOR.OUTPUT_NORM: (
|
|
77
78
|
"gpt_neox.final_layer_norm", # gptneox
|
|
78
79
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
|
79
|
-
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
|
|
80
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe plamo2
|
|
80
81
|
"norm", # llama-pth
|
|
81
82
|
"transformer.norm_f", # mpt dbrx
|
|
82
83
|
"ln_f", # refact bloom qwen gpt2
|
|
@@ -118,13 +119,14 @@ class TensorNameMap:
|
|
|
118
119
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
|
119
120
|
"h.{bid}.input_layernorm", # bloom
|
|
120
121
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
|
121
|
-
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
|
122
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
|
|
122
123
|
"layers.{bid}.attention_norm", # llama-pth
|
|
123
124
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
|
124
125
|
"model.layers.{bid}.ln1", # yi
|
|
125
126
|
"h.{bid}.ln_1", # gpt2
|
|
126
127
|
"transformer.h.{bid}.ln", # phi2
|
|
127
128
|
"model.layers.layers.{bid}.norm", # plamo
|
|
129
|
+
"model.layers.layers.{bid}.pre_mixer_norm", # plamo2
|
|
128
130
|
"model.layers.{bid}.attention_norm", # internlm2
|
|
129
131
|
"model.layers.{bid}.norm", # mamba-qbert
|
|
130
132
|
"backbone.layers.{bid}.norm", # mamba
|
|
@@ -136,6 +138,7 @@ class TensorNameMap:
|
|
|
136
138
|
"model.layers.{bid}.ln1", # rwkv7
|
|
137
139
|
"model.layers.{bid}.input_layernorm", # llama4
|
|
138
140
|
"transformer_encoder.{bid}.attention_norm", # neobert
|
|
141
|
+
"model.layers.{bid}.operator_norm", # lfm2
|
|
139
142
|
),
|
|
140
143
|
|
|
141
144
|
# Attention norm 2
|
|
@@ -161,6 +164,7 @@ class TensorNameMap:
|
|
|
161
164
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
|
162
165
|
"encoder.layers.{bid}.mixer.Wqkv", # jina
|
|
163
166
|
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
|
167
|
+
"model.layers.layers.{bid}.mixer.qkv_proj", # plamo2
|
|
164
168
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
|
165
169
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
|
166
170
|
"transformer_encoder.{bid}.qkv", # neobert
|
|
@@ -220,6 +224,7 @@ class TensorNameMap:
|
|
|
220
224
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
|
221
225
|
"h.{bid}.self_attention.dense", # bloom
|
|
222
226
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
|
227
|
+
"model.layers.{bid}.self_attn.out_proj", # lfm2
|
|
223
228
|
"model.layers.{bid}.self_attn.linear_attn", # deci
|
|
224
229
|
"layers.{bid}.attention.wo", # llama-pth
|
|
225
230
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
|
@@ -230,6 +235,7 @@ class TensorNameMap:
|
|
|
230
235
|
"h.{bid}.attn.c_proj", # gpt2
|
|
231
236
|
"transformer.h.{bid}.mixer.out_proj", # phi2
|
|
232
237
|
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
|
|
238
|
+
"model.layers.layers.{bid}.mixer.o_proj", # plamo2
|
|
233
239
|
"model.layers.{bid}.attention.wo", # internlm2
|
|
234
240
|
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
|
235
241
|
"encoder.layers.{bid}.mixer.out_proj", # jina
|
|
@@ -252,8 +258,9 @@ class TensorNameMap:
|
|
|
252
258
|
),
|
|
253
259
|
|
|
254
260
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
|
255
|
-
"model.layers.{bid}.post_attention_layernorm",
|
|
256
|
-
"model.layers.{bid}.post_self_attn_layernorm",
|
|
261
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
|
|
262
|
+
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
|
|
263
|
+
"model.layers.layers.{bid}.post_mixer_norm.weight", # plamo2
|
|
257
264
|
),
|
|
258
265
|
|
|
259
266
|
# Rotary embeddings
|
|
@@ -279,19 +286,25 @@ class TensorNameMap:
|
|
|
279
286
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
|
280
287
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
281
288
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
289
|
+
"model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid
|
|
290
|
+
"model.layers.{bid}.pre_moe_layernorm", # mini-jamba
|
|
282
291
|
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
283
292
|
"transformer_encoder.{bid}.ffn_norm", # neobert
|
|
293
|
+
"model.layers.layers.{bid}.pre_mlp_norm", # plamo2
|
|
284
294
|
),
|
|
285
295
|
|
|
286
296
|
# Post feed-forward norm
|
|
287
297
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
|
288
298
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
|
299
|
+
"model.layers.{bid}.pre_ff_layernorm.weight",
|
|
289
300
|
),
|
|
290
301
|
|
|
291
302
|
# Post feed-forward norm
|
|
292
303
|
MODEL_TENSOR.FFN_POST_NORM: (
|
|
293
304
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
|
294
305
|
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
|
|
306
|
+
"model.layers.layers.{bid}.post_mlp_norm.weight", # plamo2
|
|
307
|
+
"model.layers.{bid}.feed_forward.up_proj",
|
|
295
308
|
),
|
|
296
309
|
|
|
297
310
|
MODEL_TENSOR.FFN_GATE_INP: (
|
|
@@ -301,8 +314,9 @@ class TensorNameMap:
|
|
|
301
314
|
"transformer.decoder_layer.{bid}.router", # Grok
|
|
302
315
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
|
303
316
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
|
304
|
-
"model.layers.{bid}.feed_forward.router", # llama4
|
|
317
|
+
"model.layers.{bid}.feed_forward.router", # llama4 jamba
|
|
305
318
|
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
|
319
|
+
"model.layers.{bid}.mlp.gate.wg", # hunyuan
|
|
306
320
|
),
|
|
307
321
|
|
|
308
322
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
|
@@ -310,7 +324,8 @@ class TensorNameMap:
|
|
|
310
324
|
),
|
|
311
325
|
|
|
312
326
|
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
|
313
|
-
"model.layers.{bid}.mlp.gate.e_score_correction",
|
|
327
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
|
|
328
|
+
"model.layers.{bid}.mlp.moe_statics.e_score_correction", # ernie4.5-moe
|
|
314
329
|
),
|
|
315
330
|
|
|
316
331
|
# Feed-forward up
|
|
@@ -334,6 +349,7 @@ class TensorNameMap:
|
|
|
334
349
|
"model.layers.{bid}.mlp.fc1", # phi2
|
|
335
350
|
"model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414
|
|
336
351
|
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
|
352
|
+
"model.layers.layers.{bid}.mlp.gate_up_proj", # plamo2
|
|
337
353
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
|
338
354
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
|
339
355
|
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
|
|
@@ -344,24 +360,26 @@ class TensorNameMap:
|
|
|
344
360
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
345
361
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
346
362
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
347
|
-
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
363
|
+
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
|
|
348
364
|
"transformer_encoder.{bid}.ffn.w12", # neobert
|
|
349
365
|
),
|
|
350
366
|
|
|
351
367
|
MODEL_TENSOR.FFN_UP_EXP: (
|
|
352
|
-
"layers.{bid}.feed_forward.experts.w3",
|
|
353
|
-
"transformer.decoder_layer.{bid}.moe.linear_v",
|
|
354
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.v1",
|
|
355
|
-
"model.layers.{bid}.mlp.experts.up_proj",
|
|
356
|
-
"model.layers.{bid}.block_sparse_moe.experts.w3",
|
|
357
|
-
"model.layers.{bid}.feed_forward.experts.up_proj",
|
|
358
|
-
"encoder.layers.{bid}.mlp.experts.mlp.w1",
|
|
368
|
+
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
|
369
|
+
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
|
370
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
|
371
|
+
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) ernie4.5-moe
|
|
372
|
+
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
|
373
|
+
"model.layers.{bid}.feed_forward.experts.up_proj", # llama4
|
|
374
|
+
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
|
|
359
375
|
),
|
|
360
376
|
|
|
361
377
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
|
362
378
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
|
363
379
|
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
|
364
380
|
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
|
381
|
+
"model.layers.{bid}.feed_forward.down_proj",
|
|
382
|
+
"model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
|
|
365
383
|
),
|
|
366
384
|
|
|
367
385
|
# AWQ-activation gate
|
|
@@ -382,22 +400,23 @@ class TensorNameMap:
|
|
|
382
400
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
383
401
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
384
402
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
385
|
-
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
|
403
|
+
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
|
|
386
404
|
),
|
|
387
405
|
|
|
388
406
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
|
389
|
-
"layers.{bid}.feed_forward.experts.w1",
|
|
390
|
-
"transformer.decoder_layer.{bid}.moe.linear",
|
|
391
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
|
392
|
-
"model.layers.{bid}.mlp.experts.gate_proj",
|
|
393
|
-
"model.layers.{bid}.block_sparse_moe.experts.w1",
|
|
394
|
-
"model.layers.{bid}.feed_forward.experts.gate_proj",
|
|
407
|
+
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
|
408
|
+
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
|
409
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
|
410
|
+
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged) ernie4.5-moe
|
|
411
|
+
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
|
412
|
+
"model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
|
|
395
413
|
),
|
|
396
414
|
|
|
397
415
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
|
398
416
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
|
399
417
|
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
|
400
418
|
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
|
419
|
+
"model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
|
|
401
420
|
),
|
|
402
421
|
|
|
403
422
|
# Feed-forward down
|
|
@@ -427,19 +446,19 @@ class TensorNameMap:
|
|
|
427
446
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
|
428
447
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
429
448
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
430
|
-
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
449
|
+
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
|
|
431
450
|
"transformer_encoder.{bid}.ffn.w3", # neobert
|
|
432
451
|
),
|
|
433
452
|
|
|
434
453
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
|
435
|
-
"layers.{bid}.feed_forward.experts.w2",
|
|
436
|
-
"transformer.decoder_layer.{bid}.moe.linear_1",
|
|
437
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w2",
|
|
438
|
-
"model.layers.{bid}.mlp.experts.down_proj",
|
|
439
|
-
"model.layers.{bid}.block_sparse_moe.output_linear",
|
|
440
|
-
"model.layers.{bid}.block_sparse_moe.experts.w2",
|
|
441
|
-
"model.layers.{bid}.feed_forward.experts.down_proj",
|
|
442
|
-
"encoder.layers.{bid}.mlp.experts.mlp.w2",
|
|
454
|
+
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
|
455
|
+
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
|
456
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
|
457
|
+
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) ernie4.5-moe
|
|
458
|
+
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
|
459
|
+
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
|
460
|
+
"model.layers.{bid}.feed_forward.experts.down_proj", # llama4
|
|
461
|
+
"encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe
|
|
443
462
|
),
|
|
444
463
|
|
|
445
464
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
|
@@ -447,24 +466,29 @@ class TensorNameMap:
|
|
|
447
466
|
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
|
448
467
|
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
|
449
468
|
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
|
|
469
|
+
"model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
|
|
450
470
|
),
|
|
451
471
|
|
|
452
472
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
|
453
473
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
|
454
474
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
|
475
|
+
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
|
|
455
476
|
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
|
456
477
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
|
457
478
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
|
458
479
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
|
480
|
+
"model.layers.layers.{bid}.mixer.q", # plamo2
|
|
459
481
|
),
|
|
460
482
|
|
|
461
483
|
MODEL_TENSOR.ATTN_K_NORM: (
|
|
462
484
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
|
463
485
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
|
486
|
+
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
|
|
464
487
|
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
|
465
488
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
|
466
489
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
|
467
490
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
|
491
|
+
"model.layers.layers.{bid}.mixer.k", # plamo2
|
|
468
492
|
),
|
|
469
493
|
|
|
470
494
|
MODEL_TENSOR.ROPE_FREQS: (
|
|
@@ -477,7 +501,7 @@ class TensorNameMap:
|
|
|
477
501
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
|
478
502
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
|
479
503
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
|
480
|
-
"encoder.layer.{bid}.layer_norm_2"
|
|
504
|
+
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
|
481
505
|
),
|
|
482
506
|
|
|
483
507
|
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
|
|
@@ -545,38 +569,77 @@ class TensorNameMap:
|
|
|
545
569
|
),
|
|
546
570
|
|
|
547
571
|
MODEL_TENSOR.SSM_IN: (
|
|
548
|
-
"model.layers.{bid}.in_proj",
|
|
549
|
-
"backbone.layers.{bid}.mixer.in_proj",
|
|
572
|
+
"model.layers.{bid}.in_proj", # mamba-hf
|
|
573
|
+
"backbone.layers.{bid}.mixer.in_proj", # mamba
|
|
574
|
+
"model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid
|
|
575
|
+
"model.layers.layers.{bid}.mixer.in_proj", # plamo2
|
|
550
576
|
),
|
|
551
577
|
|
|
552
578
|
MODEL_TENSOR.SSM_CONV1D: (
|
|
553
|
-
"model.layers.{bid}.conv1d",
|
|
554
|
-
"backbone.layers.{bid}.mixer.conv1d",
|
|
579
|
+
"model.layers.{bid}.conv1d", # mamba-hf
|
|
580
|
+
"backbone.layers.{bid}.mixer.conv1d", # mamba
|
|
581
|
+
"model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid
|
|
582
|
+
"model.layers.layers.{bid}.mixer.conv1d", # plamo2
|
|
555
583
|
),
|
|
556
584
|
|
|
557
585
|
MODEL_TENSOR.SSM_X: (
|
|
558
|
-
"model.layers.{bid}.x_proj",
|
|
559
|
-
"backbone.layers.{bid}.mixer.x_proj",
|
|
586
|
+
"model.layers.{bid}.x_proj", # mamba-hf
|
|
587
|
+
"backbone.layers.{bid}.mixer.x_proj", # mamba
|
|
588
|
+
"model.layers.{bid}.mamba.x_proj", # jamba
|
|
589
|
+
"model.layers.layers.{bid}.mixer.bcdt_proj", # plamo2
|
|
560
590
|
),
|
|
561
591
|
|
|
562
592
|
MODEL_TENSOR.SSM_DT: (
|
|
563
|
-
"model.layers.{bid}.dt_proj",
|
|
564
|
-
"backbone.layers.{bid}.mixer.dt_proj",
|
|
593
|
+
"model.layers.{bid}.dt_proj", # mamba-hf
|
|
594
|
+
"backbone.layers.{bid}.mixer.dt_proj", # mamba
|
|
595
|
+
"model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
|
|
596
|
+
"model.layers.layers.{bid}.mixer.dt_proj", # plamo2
|
|
597
|
+
),
|
|
598
|
+
|
|
599
|
+
MODEL_TENSOR.SSM_DT_NORM: (
|
|
600
|
+
"model.layers.{bid}.mamba.dt_layernorm", # jamba
|
|
565
601
|
),
|
|
566
602
|
|
|
567
603
|
MODEL_TENSOR.SSM_A: (
|
|
568
|
-
"model.layers.{bid}.A_log",
|
|
569
|
-
"backbone.layers.{bid}.mixer.A_log",
|
|
604
|
+
"model.layers.{bid}.A_log", # mamba-hf
|
|
605
|
+
"backbone.layers.{bid}.mixer.A_log", # mamba
|
|
606
|
+
"model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid
|
|
607
|
+
"model.layers.layers.{bid}.mixer.A_log", # plamo2
|
|
608
|
+
),
|
|
609
|
+
|
|
610
|
+
MODEL_TENSOR.SSM_B_NORM: (
|
|
611
|
+
"model.layers.{bid}.mamba.b_layernorm", # jamba
|
|
612
|
+
"model.layers.{bid}.mamba.B_layernorm", # mini-jamba
|
|
613
|
+
"model.layers.layers.{bid}.mixer.B_norm.weight", # plamo2
|
|
614
|
+
),
|
|
615
|
+
|
|
616
|
+
MODEL_TENSOR.SSM_C_NORM: (
|
|
617
|
+
"model.layers.{bid}.mamba.c_layernorm", # jamba
|
|
618
|
+
"model.layers.{bid}.mamba.C_layernorm", # mini-jamba
|
|
619
|
+
"model.layers.layers.{bid}.mixer.C_norm.weight", # plamo2
|
|
570
620
|
),
|
|
571
621
|
|
|
572
622
|
MODEL_TENSOR.SSM_D: (
|
|
573
|
-
"model.layers.{bid}.D",
|
|
574
|
-
"backbone.layers.{bid}.mixer.D",
|
|
623
|
+
"model.layers.{bid}.D", # mamba-hf
|
|
624
|
+
"backbone.layers.{bid}.mixer.D", # mamba
|
|
625
|
+
"model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid
|
|
626
|
+
"model.layers.layers.{bid}.mixer.D", # plamo2
|
|
627
|
+
),
|
|
628
|
+
|
|
629
|
+
MODEL_TENSOR.SSM_DT_NORM: (
|
|
630
|
+
"model.layers.layers.{bid}.mixer.dt_norm.weight", # plamo2
|
|
631
|
+
),
|
|
632
|
+
|
|
633
|
+
MODEL_TENSOR.SSM_NORM: (
|
|
634
|
+
"model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid
|
|
635
|
+
"backbone.layers.{bid}.mixer.norm", # mamba2
|
|
575
636
|
),
|
|
576
637
|
|
|
577
638
|
MODEL_TENSOR.SSM_OUT: (
|
|
578
|
-
"model.layers.{bid}.out_proj",
|
|
579
|
-
"backbone.layers.{bid}.mixer.out_proj",
|
|
639
|
+
"model.layers.{bid}.out_proj", # mamba-hf
|
|
640
|
+
"backbone.layers.{bid}.mixer.out_proj", # mamba
|
|
641
|
+
"model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid
|
|
642
|
+
"model.layers.layers.{bid}.mixer.out_proj", # plamo2
|
|
580
643
|
),
|
|
581
644
|
|
|
582
645
|
MODEL_TENSOR.TIME_MIX_W0: (
|
|
@@ -978,6 +1041,18 @@ class TensorNameMap:
|
|
|
978
1041
|
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
|
979
1042
|
),
|
|
980
1043
|
|
|
1044
|
+
MODEL_TENSOR.SHORTCONV_CONV: (
|
|
1045
|
+
"model.layers.{bid}.conv.conv",
|
|
1046
|
+
),
|
|
1047
|
+
|
|
1048
|
+
MODEL_TENSOR.SHORTCONV_INPROJ: (
|
|
1049
|
+
"model.layers.{bid}.conv.in_proj",
|
|
1050
|
+
),
|
|
1051
|
+
|
|
1052
|
+
MODEL_TENSOR.SHORTCONV_OUTPROJ: (
|
|
1053
|
+
"model.layers.{bid}.conv.out_proj",
|
|
1054
|
+
),
|
|
1055
|
+
|
|
981
1056
|
#############################################################################
|
|
982
1057
|
## Vision encoder
|
|
983
1058
|
|
|
@@ -245,9 +245,18 @@ class SpecialVocab:
|
|
|
245
245
|
if not tokenizer_config:
|
|
246
246
|
return True
|
|
247
247
|
chat_template_alt = None
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
248
|
+
chat_template_json = path / 'chat_template.json'
|
|
249
|
+
chat_template_jinja = path / 'chat_template.jinja'
|
|
250
|
+
if chat_template_jinja.is_file():
|
|
251
|
+
with open(chat_template_jinja, encoding = 'utf-8') as f:
|
|
252
|
+
chat_template_alt = f.read()
|
|
253
|
+
if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
|
|
254
|
+
chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
|
|
255
|
+
for template_path in additional_templates:
|
|
256
|
+
with open(template_path, encoding = 'utf-8') as fp:
|
|
257
|
+
chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
|
|
258
|
+
elif chat_template_json.is_file():
|
|
259
|
+
with open(chat_template_json, encoding = 'utf-8') as f:
|
|
251
260
|
chat_template_alt = json.load(f).get('chat_template')
|
|
252
261
|
chat_template = tokenizer_config.get('chat_template', chat_template_alt)
|
|
253
262
|
if chat_template is None or isinstance(chat_template, (str, list)):
|
|
@@ -71,52 +71,13 @@ extern "C" {
|
|
|
71
71
|
typedef int32_t llama_seq_id;
|
|
72
72
|
|
|
73
73
|
enum llama_vocab_type {
|
|
74
|
-
LLAMA_VOCAB_TYPE_NONE
|
|
75
|
-
LLAMA_VOCAB_TYPE_SPM
|
|
76
|
-
LLAMA_VOCAB_TYPE_BPE
|
|
77
|
-
LLAMA_VOCAB_TYPE_WPM
|
|
78
|
-
LLAMA_VOCAB_TYPE_UGM
|
|
79
|
-
LLAMA_VOCAB_TYPE_RWKV
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
// pre-tokenization types
|
|
83
|
-
enum llama_vocab_pre_type {
|
|
84
|
-
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
|
|
85
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
|
|
86
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
|
|
87
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
|
|
88
|
-
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
|
|
89
|
-
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
|
|
90
|
-
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
|
|
91
|
-
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
|
92
|
-
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
|
93
|
-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
|
94
|
-
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
|
95
|
-
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
|
96
|
-
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
|
97
|
-
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
|
98
|
-
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
|
99
|
-
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
100
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
|
101
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
|
102
|
-
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
|
103
|
-
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
|
104
|
-
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
|
105
|
-
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
|
106
|
-
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
|
107
|
-
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
|
108
|
-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
|
109
|
-
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
|
110
|
-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
111
|
-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
112
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
113
|
-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
114
|
-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
115
|
-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
116
|
-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
117
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
118
|
-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
119
|
-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
74
|
+
LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
|
|
75
|
+
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
|
76
|
+
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
|
77
|
+
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
|
78
|
+
LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
|
|
79
|
+
LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
|
|
80
|
+
LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
|
|
120
81
|
};
|
|
121
82
|
|
|
122
83
|
enum llama_rope_type {
|
|
@@ -374,6 +335,9 @@ extern "C" {
|
|
|
374
335
|
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
375
336
|
// NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
|
|
376
337
|
// ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
|
|
338
|
+
bool kv_unified; // use a unified buffer across the input sequences when computing the attention
|
|
339
|
+
// try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
|
|
340
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14363
|
|
377
341
|
};
|
|
378
342
|
|
|
379
343
|
// model quantization parameters
|
|
@@ -764,7 +728,7 @@ extern "C" {
|
|
|
764
728
|
// - lazily on next llama_decode()
|
|
765
729
|
// p0 < 0 : [0, p1]
|
|
766
730
|
// p1 < 0 : [p0, inf)
|
|
767
|
-
DEPRECATED(void llama_kv_self_seq_div(
|
|
731
|
+
DEPRECATED(LLAMA_API void llama_kv_self_seq_div(
|
|
768
732
|
struct llama_context * ctx,
|
|
769
733
|
llama_seq_id seq_id,
|
|
770
734
|
llama_pos p0,
|
|
@@ -992,6 +956,7 @@ extern "C" {
|
|
|
992
956
|
// in the order they have appeared in the batch.
|
|
993
957
|
// Rows: number of tokens for which llama_batch.logits[i] != 0
|
|
994
958
|
// Cols: n_vocab
|
|
959
|
+
// TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
|
995
960
|
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
|
996
961
|
|
|
997
962
|
// Logits for the ith token. For positive indices, Equivalent to:
|
|
@@ -1006,6 +971,7 @@ extern "C" {
|
|
|
1006
971
|
// in the order they have appeared in the batch.
|
|
1007
972
|
// shape: [n_outputs*n_embd]
|
|
1008
973
|
// Otherwise, returns NULL.
|
|
974
|
+
// TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
|
1009
975
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
|
1010
976
|
|
|
1011
977
|
// Get the embeddings for the ith token. For positive indices, Equivalent to:
|
|
@@ -1044,6 +1010,7 @@ extern "C" {
|
|
|
1044
1010
|
LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator
|
|
1045
1011
|
LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line
|
|
1046
1012
|
LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding
|
|
1013
|
+
LLAMA_API llama_token llama_vocab_mask(const struct llama_vocab * vocab); // mask
|
|
1047
1014
|
|
|
1048
1015
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1049
1016
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
@@ -1429,6 +1396,7 @@ extern "C" {
|
|
|
1429
1396
|
|
|
1430
1397
|
int32_t n_p_eval;
|
|
1431
1398
|
int32_t n_eval;
|
|
1399
|
+
int32_t n_reused; // number of times a ggml compute graph had been reused
|
|
1432
1400
|
};
|
|
1433
1401
|
|
|
1434
1402
|
struct llama_perf_sampler_data {
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{%- if not add_generation_prompt is defined -%}
|
|
2
|
+
{%- set add_generation_prompt = true -%}
|
|
3
|
+
{%- endif -%}
|
|
4
|
+
{%- set ns = namespace(system_prompt='') -%}
|
|
5
|
+
{%- for message in messages -%}
|
|
6
|
+
{%- if message['role'] == 'system' -%}
|
|
7
|
+
{%- set ns.system_prompt = message['content'] -%}
|
|
8
|
+
{%- endif -%}
|
|
9
|
+
{%- endfor -%}
|
|
10
|
+
{{bos_token}}
|
|
11
|
+
{%- if ns.system_prompt != '' -%}
|
|
12
|
+
{{- 'System: ' + ns.system_prompt + '\n\n' -}}
|
|
13
|
+
{%- endif -%}
|
|
14
|
+
{%- for message in messages -%}
|
|
15
|
+
{%- if message['role'] == 'user' -%}
|
|
16
|
+
{{- 'User: ' + message['content']|trim + '\n\n' -}}
|
|
17
|
+
{%- endif -%}
|
|
18
|
+
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
|
19
|
+
{%- set content = message['content'] -%}
|
|
20
|
+
{%- if '</think>' in content -%}
|
|
21
|
+
{%- set content = content.split('</think>')[-1] -%}
|
|
22
|
+
{%- endif -%}
|
|
23
|
+
{{- 'Assistant: ' + content|trim + '\n\n' -}}
|
|
24
|
+
{%- endif -%}
|
|
25
|
+
{%- endfor -%}
|
|
26
|
+
{%- if add_generation_prompt -%}
|
|
27
|
+
{{- 'Assistant:' -}}
|
|
28
|
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
|
29
|
+
{{- ' <think>\n</think>' }}
|
|
30
|
+
{%- endif %}
|
|
31
|
+
{%- if enable_thinking is defined and enable_thinking is true %}
|
|
32
|
+
{{- ' <think>' }}
|
|
33
|
+
{%- endif %}
|
|
34
|
+
{%- endif -%}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{%- if tools -%}
|
|
2
|
+
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
|
|
3
|
+
{%- endif -%}
|
|
4
|
+
{%- for message in messages -%}
|
|
5
|
+
{%- if loop.first and messages[0]['role'] != 'system' -%}
|
|
6
|
+
<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
|
|
7
|
+
{%- endif -%}
|
|
8
|
+
{%- if message['role'] == 'system' -%}
|
|
9
|
+
<|im_system|>system<|im_middle|>
|
|
10
|
+
{%- elif message['role'] == 'user' -%}
|
|
11
|
+
<|im_user|>user<|im_middle|>
|
|
12
|
+
{%- elif message['role'] == 'assistant' -%}
|
|
13
|
+
<|im_assistant|>assistant<|im_middle|>
|
|
14
|
+
{%- elif message['role'] == 'tool' -%}
|
|
15
|
+
<|im_system|>tool<|im_middle|>
|
|
16
|
+
{%- endif -%}
|
|
17
|
+
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
|
18
|
+
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
|
|
19
|
+
<|tool_calls_section_begin|>
|
|
20
|
+
{%- for tool_call in message['tool_calls'] -%}
|
|
21
|
+
{%- set func_name = tool_call['function']['name'] -%}
|
|
22
|
+
{%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
|
|
23
|
+
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
|
|
24
|
+
{%- endfor -%}
|
|
25
|
+
<|tool_calls_section_end|>
|
|
26
|
+
{%- elif message['role'] == 'tool' -%}
|
|
27
|
+
## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
|
|
28
|
+
{%- elif message['content'] is string -%}
|
|
29
|
+
{{ message['content'] }}
|
|
30
|
+
{%- elif message['content'] is not none -%}
|
|
31
|
+
{% for content in message['content'] -%}
|
|
32
|
+
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
|
33
|
+
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
|
34
|
+
{% else -%}
|
|
35
|
+
{{ content['text'] }}
|
|
36
|
+
{%- endif -%}
|
|
37
|
+
{%- endfor -%}
|
|
38
|
+
{%- endif -%}
|
|
39
|
+
<|im_end|>
|
|
40
|
+
{%- endfor -%}
|
|
41
|
+
{%- if add_generation_prompt -%}
|
|
42
|
+
<|im_assistant|>assistant<|im_middle|>
|
|
43
|
+
{%- endif -%}
|