@novastera-oss/llamarn 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +141 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +58 -24
- package/cpp/LlamaCppModel.h +3 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +32 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
- package/cpp/llama.cpp/common/arg.cpp +37 -6
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
- package/cpp/llama.cpp/common/chat-parser.h +2 -0
- package/cpp/llama.cpp/common/chat.cpp +12 -9
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +53 -40
- package/cpp/llama.cpp/common/common.h +6 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
- package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +141 -38
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
- package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
- package/cpp/llama.cpp/src/llama-arch.h +25 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
- package/cpp/llama.cpp/src/llama-batch.h +110 -57
- package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +360 -266
- package/cpp/llama.cpp/src/llama-context.h +27 -23
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
- package/cpp/llama.cpp/src/llama-graph.h +126 -58
- package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
- package/cpp/llama.cpp/src/llama-hparams.h +16 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
- package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +73 -36
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
- package/cpp/llama.cpp/src/llama-model.h +26 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
- package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/rn-completion.cpp +2 -2
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
- package/ios/include/chat.h +1 -1
- package/ios/include/common.h +6 -2
- package/ios/include/llama.h +141 -38
- package/ios/libs/llama.xcframework/Info.plist +15 -15
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -31,6 +31,7 @@ class TensorNameMap:
|
|
|
31
31
|
"model.embeddings", # rwkv7
|
|
32
32
|
"model.word_embeddings", # bailingmoe
|
|
33
33
|
"language_model.model.embed_tokens", # llama4
|
|
34
|
+
"encoder", # neobert
|
|
34
35
|
),
|
|
35
36
|
|
|
36
37
|
# Token type embeddings
|
|
@@ -134,6 +135,7 @@ class TensorNameMap:
|
|
|
134
135
|
"rwkv.blocks.{bid}.ln1", # rwkv6
|
|
135
136
|
"model.layers.{bid}.ln1", # rwkv7
|
|
136
137
|
"model.layers.{bid}.input_layernorm", # llama4
|
|
138
|
+
"transformer_encoder.{bid}.attention_norm", # neobert
|
|
137
139
|
),
|
|
138
140
|
|
|
139
141
|
# Attention norm 2
|
|
@@ -161,6 +163,7 @@ class TensorNameMap:
|
|
|
161
163
|
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
|
162
164
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
|
163
165
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
|
166
|
+
"transformer_encoder.{bid}.qkv", # neobert
|
|
164
167
|
),
|
|
165
168
|
|
|
166
169
|
# Attention query
|
|
@@ -236,6 +239,7 @@ class TensorNameMap:
|
|
|
236
239
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
|
237
240
|
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
|
238
241
|
"model.layers.{bid}.self_attn.o_proj", # llama4
|
|
242
|
+
"transformer_encoder.{bid}.wo", # neobert
|
|
239
243
|
),
|
|
240
244
|
|
|
241
245
|
# Attention output norm
|
|
@@ -276,6 +280,7 @@ class TensorNameMap:
|
|
|
276
280
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
277
281
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
278
282
|
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
283
|
+
"transformer_encoder.{bid}.ffn_norm", # neobert
|
|
279
284
|
),
|
|
280
285
|
|
|
281
286
|
# Post feed-forward norm
|
|
@@ -305,7 +310,7 @@ class TensorNameMap:
|
|
|
305
310
|
),
|
|
306
311
|
|
|
307
312
|
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
|
308
|
-
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
|
313
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
|
|
309
314
|
),
|
|
310
315
|
|
|
311
316
|
# Feed-forward up
|
|
@@ -333,11 +338,14 @@ class TensorNameMap:
|
|
|
333
338
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
|
334
339
|
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
|
|
335
340
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
|
336
|
-
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
|
341
|
+
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2 (split up/gate, no longer used)
|
|
342
|
+
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
|
|
343
|
+
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
|
|
337
344
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
338
345
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
339
346
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
340
347
|
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
348
|
+
"transformer_encoder.{bid}.ffn.w12", # neobert
|
|
341
349
|
),
|
|
342
350
|
|
|
343
351
|
MODEL_TENSOR.FFN_UP_EXP: (
|
|
@@ -370,7 +378,7 @@ class TensorNameMap:
|
|
|
370
378
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
|
371
379
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
|
372
380
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
|
373
|
-
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
|
381
|
+
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
|
|
374
382
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
375
383
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
376
384
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
@@ -420,6 +428,7 @@ class TensorNameMap:
|
|
|
420
428
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
421
429
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
422
430
|
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
431
|
+
"transformer_encoder.{bid}.ffn.w3", # neobert
|
|
423
432
|
),
|
|
424
433
|
|
|
425
434
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
|
@@ -471,6 +480,70 @@ class TensorNameMap:
|
|
|
471
480
|
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
|
472
481
|
),
|
|
473
482
|
|
|
483
|
+
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
|
|
484
|
+
"model.embed_tokens_per_layer", # gemma3n
|
|
485
|
+
),
|
|
486
|
+
|
|
487
|
+
MODEL_TENSOR.PER_LAYER_MODEL_PROJ: (
|
|
488
|
+
"model.per_layer_model_projection", # gemma3n
|
|
489
|
+
),
|
|
490
|
+
|
|
491
|
+
MODEL_TENSOR.PER_LAYER_PROJ_NORM: (
|
|
492
|
+
"model.per_layer_projection_norm", # gemma3n
|
|
493
|
+
),
|
|
494
|
+
|
|
495
|
+
MODEL_TENSOR.ALTUP_PROJ: (
|
|
496
|
+
"model.altup_projections", # gemma3n
|
|
497
|
+
),
|
|
498
|
+
|
|
499
|
+
MODEL_TENSOR.ALTUP_UNEMBD_PROJ: (
|
|
500
|
+
"model.altup_unembed_projections", # gemma3n
|
|
501
|
+
),
|
|
502
|
+
|
|
503
|
+
MODEL_TENSOR.PER_LAYER_INP_GATE: (
|
|
504
|
+
"model.layers.{bid}.per_layer_input_gate", # gemma3n
|
|
505
|
+
),
|
|
506
|
+
|
|
507
|
+
MODEL_TENSOR.PER_LAYER_PROJ: (
|
|
508
|
+
"model.layers.{bid}.per_layer_projection", # gemma3n
|
|
509
|
+
),
|
|
510
|
+
|
|
511
|
+
MODEL_TENSOR.PER_LAYER_POST_NORM: (
|
|
512
|
+
"model.layers.{bid}.post_per_layer_input_norm", # gemma3n
|
|
513
|
+
),
|
|
514
|
+
|
|
515
|
+
MODEL_TENSOR.ALTUP_CORRECT_COEF: (
|
|
516
|
+
"model.layers.{bid}.altup.correction_coefs", # gemma3n
|
|
517
|
+
),
|
|
518
|
+
|
|
519
|
+
MODEL_TENSOR.ALTUP_CORRECT_SCALE: (
|
|
520
|
+
"model.layers.{bid}.altup.correct_output_scale", # gemma3n
|
|
521
|
+
),
|
|
522
|
+
|
|
523
|
+
MODEL_TENSOR.ALTUP_PREDICT_COEF: (
|
|
524
|
+
"model.layers.{bid}.altup.prediction_coefs", # gemma3n
|
|
525
|
+
),
|
|
526
|
+
|
|
527
|
+
MODEL_TENSOR.ALTUP_ROUTER: (
|
|
528
|
+
"model.layers.{bid}.altup.modality_router", # gemma3n
|
|
529
|
+
),
|
|
530
|
+
|
|
531
|
+
MODEL_TENSOR.ALTUP_ROUTER_NORM: (
|
|
532
|
+
"model.layers.{bid}.altup.router_norm", # gemma3n
|
|
533
|
+
),
|
|
534
|
+
|
|
535
|
+
MODEL_TENSOR.LAUREL_L: (
|
|
536
|
+
"model.layers.{bid}.laurel.linear_left", # gemma3n
|
|
537
|
+
),
|
|
538
|
+
|
|
539
|
+
MODEL_TENSOR.LAUREL_R: (
|
|
540
|
+
"model.layers.{bid}.laurel.linear_right", # gemma3n
|
|
541
|
+
),
|
|
542
|
+
|
|
543
|
+
MODEL_TENSOR.LAUREL_POST_NORM: (
|
|
544
|
+
"model.layers.{bid}.laurel.post_laurel_norm", # gemma3n
|
|
545
|
+
),
|
|
546
|
+
|
|
474
547
|
MODEL_TENSOR.SSM_IN: (
|
|
475
548
|
"model.layers.{bid}.in_proj",
|
|
476
549
|
"backbone.layers.{bid}.mixer.in_proj",
|
|
@@ -830,12 +903,14 @@ class TensorNameMap:
|
|
|
830
903
|
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
|
831
904
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
|
832
905
|
"encoder.final_layer_norm", # t5
|
|
906
|
+
"layer_norm", # neobert
|
|
833
907
|
),
|
|
834
908
|
|
|
835
909
|
MODEL_TENSOR.CLS: (
|
|
836
910
|
"classifier", # jina
|
|
837
911
|
"classifier.dense", # roberta
|
|
838
912
|
"pre_classifier", # distillbert
|
|
913
|
+
"dense", # neobert
|
|
839
914
|
),
|
|
840
915
|
|
|
841
916
|
MODEL_TENSOR.CLS_OUT: (
|
|
@@ -7,7 +7,10 @@ import os
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
try:
|
|
11
|
+
from sentencepiece import SentencePieceProcessor
|
|
12
|
+
except ImportError:
|
|
13
|
+
SentencePieceProcessor = None
|
|
11
14
|
|
|
12
15
|
import gguf
|
|
13
16
|
|
|
@@ -116,6 +119,7 @@ class SpecialVocab:
|
|
|
116
119
|
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
|
117
120
|
|
|
118
121
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
|
122
|
+
tokenizer = None
|
|
119
123
|
tokenizer_file = path / 'tokenizer.json'
|
|
120
124
|
if tokenizer_file.is_file():
|
|
121
125
|
with open(tokenizer_file, encoding = 'utf-8') as f:
|
|
@@ -149,11 +153,97 @@ class SpecialVocab:
|
|
|
149
153
|
added_tokens = tokenizer.get('added_tokens', {})
|
|
150
154
|
else:
|
|
151
155
|
added_tokens = {}
|
|
156
|
+
tokenizer_config = None
|
|
152
157
|
tokenizer_config_file = path / 'tokenizer_config.json'
|
|
153
|
-
if
|
|
158
|
+
if tokenizer_config_file.is_file():
|
|
159
|
+
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
|
160
|
+
tokenizer_config = json.load(f)
|
|
161
|
+
if tokenizer:
|
|
162
|
+
special_bos = (tokenizer_config or {}).get('bos_token')
|
|
163
|
+
special_cls = (tokenizer_config or {}).get('cls_token')
|
|
164
|
+
special_eos = (tokenizer_config or {}).get('eos_token')
|
|
165
|
+
special_sep = (tokenizer_config or {}).get('sep_token')
|
|
166
|
+
if not special_bos and special_cls and tokenizer_config:
|
|
167
|
+
tokenizer_config['bos_token'] = special_bos = special_cls
|
|
168
|
+
if not special_eos and special_sep and tokenizer_config:
|
|
169
|
+
tokenizer_config['eos_token'] = special_eos = special_sep
|
|
170
|
+
if post_processor := tokenizer.get('post_processor'):
|
|
171
|
+
for processor in post_processor.get('processors', [post_processor]):
|
|
172
|
+
if processor.get('type') == 'RobertaProcessing':
|
|
173
|
+
self.add_special_token['bos'] = True
|
|
174
|
+
self.add_special_token['eos'] = True
|
|
175
|
+
self.add_special_token['sep'] = True
|
|
176
|
+
if not special_cls and tokenizer_config:
|
|
177
|
+
special_cls = processor.get('cls', [special_bos])[0]
|
|
178
|
+
tokenizer_config['cls_token'] = special_cls
|
|
179
|
+
if not special_sep and tokenizer_config:
|
|
180
|
+
special_sep = processor.get('sep', [special_eos])[0]
|
|
181
|
+
tokenizer_config['sep_token'] = special_sep
|
|
182
|
+
continue
|
|
183
|
+
# Crude parsing of TemplateProcessing to determine if BOS/SEP/EOS should be added
|
|
184
|
+
# Only works with simple templates, **will** get it wrong on unusual sequences
|
|
185
|
+
if processor.get('type') == 'TemplateProcessing':
|
|
186
|
+
tmpl_single = processor.get('single', [])
|
|
187
|
+
tmpl_pair = processor.get('pair', [])
|
|
188
|
+
special_first = None
|
|
189
|
+
special_last = None
|
|
190
|
+
if len(tmpl_single) > 1:
|
|
191
|
+
if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
|
|
192
|
+
if not tokenizer_config:
|
|
193
|
+
special_bos = special_first
|
|
194
|
+
self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
|
|
195
|
+
if special_first not in (special_bos, special_cls):
|
|
196
|
+
logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')
|
|
197
|
+
if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'):
|
|
198
|
+
if not tokenizer_config:
|
|
199
|
+
special_eos = special_last
|
|
200
|
+
elif special_last != special_eos:
|
|
201
|
+
if 'eot' not in self.special_token_types:
|
|
202
|
+
self.special_token_types = tuple(self.special_token_types) + ('eot', )
|
|
203
|
+
tokenizer_config['eot_token'] = special_eos
|
|
204
|
+
elif 'eom' not in self.special_token_types:
|
|
205
|
+
self.special_token_types = tuple(self.special_token_types) + ('eom', )
|
|
206
|
+
tokenizer_config['eom_token'] = special_eos
|
|
207
|
+
else:
|
|
208
|
+
logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!')
|
|
209
|
+
tokenizer_config['eos_token'] = special_eos = special_last
|
|
210
|
+
self.add_special_token['eos'] = True if special_last == special_eos else False
|
|
211
|
+
if special_last != special_eos:
|
|
212
|
+
logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing<single>')
|
|
213
|
+
if tmpl_pair:
|
|
214
|
+
seq_start = 1 if special_first and tmpl_pair[0].get('SpecialToken', {}).get('id') == special_first else 0
|
|
215
|
+
seq_stop = -1 if special_last and tmpl_pair[-1].get('SpecialToken', {}).get('id') == special_last else None
|
|
216
|
+
if (special_first and seq_start == 0) or (special_last and seq_stop is None):
|
|
217
|
+
logger.warning('TemplateProcessing<single> leading/trailing special tokens do not match TemplateProcessing<pair>')
|
|
218
|
+
if tmpl_pair := tmpl_pair[slice(seq_start, seq_stop)]:
|
|
219
|
+
tmpl_a = tmpl_pair[0].get('Sequence', {}).get('id')
|
|
220
|
+
tmpl_b = tmpl_pair[-1].get('Sequence', {}).get('id')
|
|
221
|
+
if tmpl_a != 'A' or tmpl_b != 'B':
|
|
222
|
+
logger.warning(f'Unknown sequence {tmpl_a}...{tmpl_b} in TemplateProcessing<pair>')
|
|
223
|
+
# A [sep] [eos] B
|
|
224
|
+
if tmpl_a == 'A' and tmpl_b == 'B' and (tmpl_pair := tmpl_pair[1:-1]):
|
|
225
|
+
add_sep = False
|
|
226
|
+
if special_entry := tmpl_pair[0].get('SpecialToken', {}).get('id'):
|
|
227
|
+
if special_entry in (special_sep, special_eos) and not special_last:
|
|
228
|
+
add_sep = True
|
|
229
|
+
if special_entry not in (special_sep, special_eos):
|
|
230
|
+
logger.warning(f'Unknown separator token {special_entry!r} in TemplateProcessing<pair>')
|
|
231
|
+
else:
|
|
232
|
+
logger.warning(f'Unknown middle sequence {tmpl_pair[0]!r} in TemplateProcessing<pair>')
|
|
233
|
+
if len(tmpl_pair) == 2:
|
|
234
|
+
if special_entry := tmpl_pair[1].get('SpecialToken', {}).get('id'):
|
|
235
|
+
if special_entry in (special_sep, special_eos):
|
|
236
|
+
add_sep = True
|
|
237
|
+
if special_entry not in (special_sep, special_eos):
|
|
238
|
+
logger.warning(f'Unknown second separator token {special_entry!r} in TemplateProcessing<pair>')
|
|
239
|
+
else:
|
|
240
|
+
logger.warning(f'Unknown second middle sequence {tmpl_pair[1]!r} in TemplateProcessing<pair>')
|
|
241
|
+
self.add_special_token['sep'] = add_sep
|
|
242
|
+
if add_sep and not special_sep and tokenizer_config:
|
|
243
|
+
tokenizer_config['sep_token'] = special_eos
|
|
244
|
+
continue
|
|
245
|
+
if not tokenizer_config:
|
|
154
246
|
return True
|
|
155
|
-
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
|
156
|
-
tokenizer_config = json.load(f)
|
|
157
247
|
chat_template_alt = None
|
|
158
248
|
chat_template_file = path / 'chat_template.json'
|
|
159
249
|
if chat_template_file.is_file():
|
|
@@ -302,6 +392,9 @@ class SentencePieceVocab(Vocab):
|
|
|
302
392
|
name = "spm"
|
|
303
393
|
|
|
304
394
|
def __init__(self, base_path: Path):
|
|
395
|
+
if SentencePieceProcessor is None:
|
|
396
|
+
raise RuntimeError("sentencepiece is not installed")
|
|
397
|
+
|
|
305
398
|
added_tokens: dict[str, int] = {}
|
|
306
399
|
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
|
307
400
|
# normal location
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "gguf"
|
|
3
|
-
version = "0.17.
|
|
3
|
+
version = "0.17.1"
|
|
4
4
|
description = "Read and write ML models in GGUF for GGML"
|
|
5
5
|
authors = ["GGML <ggml@ggml.ai>"]
|
|
6
6
|
packages = [
|
|
@@ -22,7 +22,7 @@ python = ">=3.8"
|
|
|
22
22
|
numpy = ">=1.17"
|
|
23
23
|
tqdm = ">=4.27"
|
|
24
24
|
pyyaml = ">=5.1"
|
|
25
|
-
sentencepiece = ">=0.1.98,<=0.2.0"
|
|
25
|
+
sentencepiece = { version = ">=0.1.98,<=0.2.0", optional = true }
|
|
26
26
|
PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true }
|
|
27
27
|
|
|
28
28
|
[tool.poetry.dev-dependencies]
|