@novastera-oss/llamarn 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +8 -8
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +62 -1
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +22 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +15 -47
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
- package/cpp/llama.cpp/src/llama-arch.h +23 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
- package/cpp/llama.cpp/src/llama-batch.h +31 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
- package/cpp/llama.cpp/src/llama-chat.h +3 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
- package/cpp/llama.cpp/src/llama-graph.h +184 -122
- package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
- package/cpp/llama.cpp/src/llama-hparams.h +13 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
- package/cpp/llama.cpp/src/llama-model.h +21 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
- package/cpp/llama.cpp/src/llama-vocab.h +43 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +22 -4
- package/ios/include/llama.h +15 -47
- package/ios/libs/llama.xcframework/Info.plist +13 -13
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -482,12 +489,13 @@ extern "C" {
|
|
|
482
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
483
490
|
GGML_OP_IM2COL,
|
|
484
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
485
493
|
GGML_OP_CONV_2D_DW,
|
|
486
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
487
495
|
GGML_OP_POOL_1D,
|
|
488
496
|
GGML_OP_POOL_2D,
|
|
489
497
|
GGML_OP_POOL_2D_BACK,
|
|
490
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
491
499
|
GGML_OP_PAD,
|
|
492
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
501
|
GGML_OP_ROLL,
|
|
@@ -520,6 +528,8 @@ extern "C" {
|
|
|
520
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
521
529
|
GGML_OP_OPT_STEP_ADAMW,
|
|
522
530
|
|
|
531
|
+
GGML_OP_GLU,
|
|
532
|
+
|
|
523
533
|
GGML_OP_COUNT,
|
|
524
534
|
};
|
|
525
535
|
|
|
@@ -543,6 +553,16 @@ extern "C" {
|
|
|
543
553
|
GGML_UNARY_OP_COUNT,
|
|
544
554
|
};
|
|
545
555
|
|
|
556
|
+
enum ggml_glu_op {
|
|
557
|
+
GGML_GLU_OP_REGLU,
|
|
558
|
+
GGML_GLU_OP_GEGLU,
|
|
559
|
+
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
562
|
+
|
|
563
|
+
GGML_GLU_OP_COUNT,
|
|
564
|
+
};
|
|
565
|
+
|
|
546
566
|
enum ggml_object_type {
|
|
547
567
|
GGML_OBJECT_TYPE_TENSOR,
|
|
548
568
|
GGML_OBJECT_TYPE_GRAPH,
|
|
@@ -628,6 +648,9 @@ extern "C" {
|
|
|
628
648
|
|
|
629
649
|
// misc
|
|
630
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
631
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
632
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
633
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -658,6 +681,7 @@ extern "C" {
|
|
|
658
681
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
659
682
|
|
|
660
683
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
684
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
|
661
685
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
662
686
|
|
|
663
687
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
@@ -762,6 +786,7 @@ extern "C" {
|
|
|
762
786
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
763
787
|
|
|
764
788
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
789
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
|
765
790
|
|
|
766
791
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
767
792
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
@@ -1090,6 +1115,89 @@ extern "C" {
|
|
|
1090
1115
|
struct ggml_context * ctx,
|
|
1091
1116
|
struct ggml_tensor * a);
|
|
1092
1117
|
|
|
1118
|
+
// gated linear unit ops
|
|
1119
|
+
// A: n columns, r rows,
|
|
1120
|
+
// result is n / 2 columns, r rows,
|
|
1121
|
+
// expects gate in second half of row, unless swapped is true
|
|
1122
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
|
1123
|
+
struct ggml_context * ctx,
|
|
1124
|
+
struct ggml_tensor * a,
|
|
1125
|
+
enum ggml_glu_op op,
|
|
1126
|
+
bool swapped);
|
|
1127
|
+
|
|
1128
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
|
1129
|
+
struct ggml_context * ctx,
|
|
1130
|
+
struct ggml_tensor * a);
|
|
1131
|
+
|
|
1132
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
|
1133
|
+
struct ggml_context * ctx,
|
|
1134
|
+
struct ggml_tensor * a);
|
|
1135
|
+
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
|
1137
|
+
struct ggml_context * ctx,
|
|
1138
|
+
struct ggml_tensor * a);
|
|
1139
|
+
|
|
1140
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
|
1141
|
+
struct ggml_context * ctx,
|
|
1142
|
+
struct ggml_tensor * a);
|
|
1143
|
+
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
|
1145
|
+
struct ggml_context * ctx,
|
|
1146
|
+
struct ggml_tensor * a);
|
|
1147
|
+
|
|
1148
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * a);
|
|
1151
|
+
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1168
|
+
// A: n columns, r rows,
|
|
1169
|
+
// B: n columns, r rows,
|
|
1170
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
1171
|
+
struct ggml_context * ctx,
|
|
1172
|
+
struct ggml_tensor * a,
|
|
1173
|
+
struct ggml_tensor * b,
|
|
1174
|
+
enum ggml_glu_op op);
|
|
1175
|
+
|
|
1176
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
|
1177
|
+
struct ggml_context * ctx,
|
|
1178
|
+
struct ggml_tensor * a,
|
|
1179
|
+
struct ggml_tensor * b);
|
|
1180
|
+
|
|
1181
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
|
1182
|
+
struct ggml_context * ctx,
|
|
1183
|
+
struct ggml_tensor * a,
|
|
1184
|
+
struct ggml_tensor * b);
|
|
1185
|
+
|
|
1186
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
|
1187
|
+
struct ggml_context * ctx,
|
|
1188
|
+
struct ggml_tensor * a,
|
|
1189
|
+
struct ggml_tensor * b);
|
|
1190
|
+
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1093
1201
|
// normalize along rows
|
|
1094
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1095
1203
|
struct ggml_context * ctx,
|
|
@@ -1189,6 +1297,19 @@ extern "C" {
|
|
|
1189
1297
|
struct ggml_tensor * a,
|
|
1190
1298
|
float s);
|
|
1191
1299
|
|
|
1300
|
+
// x = s * a + b
|
|
1301
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
|
1302
|
+
struct ggml_context * ctx,
|
|
1303
|
+
struct ggml_tensor * a,
|
|
1304
|
+
float s,
|
|
1305
|
+
float b);
|
|
1306
|
+
|
|
1307
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
|
1308
|
+
struct ggml_context * ctx,
|
|
1309
|
+
struct ggml_tensor * a,
|
|
1310
|
+
float s,
|
|
1311
|
+
float b);
|
|
1312
|
+
|
|
1192
1313
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1193
1314
|
GGML_API struct ggml_tensor * ggml_set(
|
|
1194
1315
|
struct ggml_context * ctx,
|
|
@@ -1433,8 +1554,14 @@ extern "C" {
|
|
|
1433
1554
|
struct ggml_context * ctx,
|
|
1434
1555
|
struct ggml_tensor * a);
|
|
1435
1556
|
|
|
1557
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1558
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1559
|
+
//
|
|
1560
|
+
// broadcast:
|
|
1561
|
+
// ne02 % ne12 == 0
|
|
1562
|
+
// ne03 % ne13 == 0
|
|
1563
|
+
//
|
|
1436
1564
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1437
|
-
// mask is optional
|
|
1438
1565
|
// max_bias = 0.0f for no ALiBi
|
|
1439
1566
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1440
1567
|
struct ggml_context * ctx,
|
|
@@ -1744,6 +1871,17 @@ extern "C" {
|
|
|
1744
1871
|
struct ggml_tensor * b,
|
|
1745
1872
|
int stride);
|
|
1746
1873
|
|
|
1874
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1875
|
+
struct ggml_context * ctx,
|
|
1876
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1877
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1878
|
+
int s0, // stride dimension 0
|
|
1879
|
+
int s1, // stride dimension 1
|
|
1880
|
+
int p0, // padding dimension 0
|
|
1881
|
+
int p1, // padding dimension 1
|
|
1882
|
+
int d0, // dilation dimension 0
|
|
1883
|
+
int d1); // dilation dimension 1
|
|
1884
|
+
|
|
1747
1885
|
enum ggml_op_pool {
|
|
1748
1886
|
GGML_OP_POOL_MAX,
|
|
1749
1887
|
GGML_OP_POOL_AVG,
|
|
@@ -1786,6 +1924,12 @@ extern "C" {
|
|
|
1786
1924
|
enum ggml_scale_mode {
|
|
1787
1925
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1788
1926
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1927
|
+
|
|
1928
|
+
GGML_SCALE_MODE_COUNT
|
|
1929
|
+
};
|
|
1930
|
+
|
|
1931
|
+
enum ggml_scale_flag {
|
|
1932
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1789
1933
|
};
|
|
1790
1934
|
|
|
1791
1935
|
// interpolate
|
|
@@ -1798,14 +1942,26 @@ extern "C" {
|
|
|
1798
1942
|
|
|
1799
1943
|
// interpolate
|
|
1800
1944
|
// interpolate scale to specified dimensions
|
|
1801
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1945
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1802
1946
|
struct ggml_context * ctx,
|
|
1803
1947
|
struct ggml_tensor * a,
|
|
1804
1948
|
int ne0,
|
|
1805
1949
|
int ne1,
|
|
1806
1950
|
int ne2,
|
|
1807
1951
|
int ne3,
|
|
1808
|
-
enum ggml_scale_mode mode)
|
|
1952
|
+
enum ggml_scale_mode mode),
|
|
1953
|
+
"use ggml_interpolate instead");
|
|
1954
|
+
|
|
1955
|
+
// Up- or downsamples the input to the specified size.
|
|
1956
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1957
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1958
|
+
struct ggml_context * ctx,
|
|
1959
|
+
struct ggml_tensor * a,
|
|
1960
|
+
int64_t ne0,
|
|
1961
|
+
int64_t ne1,
|
|
1962
|
+
int64_t ne2,
|
|
1963
|
+
int64_t ne3,
|
|
1964
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1809
1965
|
|
|
1810
1966
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1811
1967
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1868,11 +2024,17 @@ extern "C" {
|
|
|
1868
2024
|
|
|
1869
2025
|
#define GGML_KQ_MASK_PAD 64
|
|
1870
2026
|
|
|
1871
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1872
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1873
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1874
|
-
// mask: [n_kv, n_batch_pad,
|
|
1875
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2027
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2028
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2029
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2030
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2031
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2032
|
+
//
|
|
2033
|
+
// broadcast:
|
|
2034
|
+
// n_head % n_head_kv == 0
|
|
2035
|
+
// n_head % ne32 == 0
|
|
2036
|
+
// ne3 % ne33 == 0
|
|
2037
|
+
//
|
|
1876
2038
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1877
2039
|
struct ggml_context * ctx,
|
|
1878
2040
|
struct ggml_tensor * q,
|
|
@@ -1911,7 +2073,8 @@ extern "C" {
|
|
|
1911
2073
|
struct ggml_tensor * dt,
|
|
1912
2074
|
struct ggml_tensor * A,
|
|
1913
2075
|
struct ggml_tensor * B,
|
|
1914
|
-
struct ggml_tensor * C
|
|
2076
|
+
struct ggml_tensor * C,
|
|
2077
|
+
struct ggml_tensor * ids);
|
|
1915
2078
|
|
|
1916
2079
|
// partition into non-overlapping windows with padding if needed
|
|
1917
2080
|
// example:
|
|
@@ -71,52 +71,13 @@ extern "C" {
|
|
|
71
71
|
typedef int32_t llama_seq_id;
|
|
72
72
|
|
|
73
73
|
enum llama_vocab_type {
|
|
74
|
-
LLAMA_VOCAB_TYPE_NONE
|
|
75
|
-
LLAMA_VOCAB_TYPE_SPM
|
|
76
|
-
LLAMA_VOCAB_TYPE_BPE
|
|
77
|
-
LLAMA_VOCAB_TYPE_WPM
|
|
78
|
-
LLAMA_VOCAB_TYPE_UGM
|
|
79
|
-
LLAMA_VOCAB_TYPE_RWKV
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
// pre-tokenization types
|
|
83
|
-
enum llama_vocab_pre_type {
|
|
84
|
-
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
|
|
85
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
|
|
86
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
|
|
87
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
|
|
88
|
-
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
|
|
89
|
-
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
|
|
90
|
-
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
|
|
91
|
-
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
|
92
|
-
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
|
93
|
-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
|
94
|
-
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
|
95
|
-
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
|
96
|
-
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
|
97
|
-
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
|
98
|
-
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
|
99
|
-
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
100
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
|
101
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
|
102
|
-
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
|
103
|
-
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
|
104
|
-
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
|
105
|
-
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
|
106
|
-
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
|
107
|
-
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
|
108
|
-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
|
109
|
-
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
|
110
|
-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
111
|
-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
112
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
113
|
-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
114
|
-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
115
|
-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
116
|
-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
117
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
118
|
-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
119
|
-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
74
|
+
LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
|
|
75
|
+
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
|
76
|
+
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
|
77
|
+
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
|
78
|
+
LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
|
|
79
|
+
LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
|
|
80
|
+
LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
|
|
120
81
|
};
|
|
121
82
|
|
|
122
83
|
enum llama_rope_type {
|
|
@@ -374,6 +335,9 @@ extern "C" {
|
|
|
374
335
|
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
375
336
|
// NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
|
|
376
337
|
// ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
|
|
338
|
+
bool kv_unified; // use a unified buffer across the input sequences when computing the attention
|
|
339
|
+
// try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
|
|
340
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14363
|
|
377
341
|
};
|
|
378
342
|
|
|
379
343
|
// model quantization parameters
|
|
@@ -764,7 +728,7 @@ extern "C" {
|
|
|
764
728
|
// - lazily on next llama_decode()
|
|
765
729
|
// p0 < 0 : [0, p1]
|
|
766
730
|
// p1 < 0 : [p0, inf)
|
|
767
|
-
DEPRECATED(void llama_kv_self_seq_div(
|
|
731
|
+
DEPRECATED(LLAMA_API void llama_kv_self_seq_div(
|
|
768
732
|
struct llama_context * ctx,
|
|
769
733
|
llama_seq_id seq_id,
|
|
770
734
|
llama_pos p0,
|
|
@@ -992,6 +956,7 @@ extern "C" {
|
|
|
992
956
|
// in the order they have appeared in the batch.
|
|
993
957
|
// Rows: number of tokens for which llama_batch.logits[i] != 0
|
|
994
958
|
// Cols: n_vocab
|
|
959
|
+
// TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
|
995
960
|
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
|
996
961
|
|
|
997
962
|
// Logits for the ith token. For positive indices, Equivalent to:
|
|
@@ -1006,6 +971,7 @@ extern "C" {
|
|
|
1006
971
|
// in the order they have appeared in the batch.
|
|
1007
972
|
// shape: [n_outputs*n_embd]
|
|
1008
973
|
// Otherwise, returns NULL.
|
|
974
|
+
// TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
|
1009
975
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
|
1010
976
|
|
|
1011
977
|
// Get the embeddings for the ith token. For positive indices, Equivalent to:
|
|
@@ -1044,6 +1010,7 @@ extern "C" {
|
|
|
1044
1010
|
LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator
|
|
1045
1011
|
LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line
|
|
1046
1012
|
LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding
|
|
1013
|
+
LLAMA_API llama_token llama_vocab_mask(const struct llama_vocab * vocab); // mask
|
|
1047
1014
|
|
|
1048
1015
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1049
1016
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
@@ -1429,6 +1396,7 @@ extern "C" {
|
|
|
1429
1396
|
|
|
1430
1397
|
int32_t n_p_eval;
|
|
1431
1398
|
int32_t n_eval;
|
|
1399
|
+
int32_t n_reused; // number of times a ggml compute graph had been reused
|
|
1432
1400
|
};
|
|
1433
1401
|
|
|
1434
1402
|
struct llama_perf_sampler_data {
|
package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama
CHANGED
|
Binary file
|
|
Binary file
|