@novastera-oss/llamarn 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +8 -8
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +62 -1
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +22 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +15 -47
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
- package/cpp/llama.cpp/src/llama-arch.h +23 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
- package/cpp/llama.cpp/src/llama-batch.h +31 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
- package/cpp/llama.cpp/src/llama-chat.h +3 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
- package/cpp/llama.cpp/src/llama-graph.h +184 -122
- package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
- package/cpp/llama.cpp/src/llama-hparams.h +13 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
- package/cpp/llama.cpp/src/llama-model.h +21 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
- package/cpp/llama.cpp/src/llama-vocab.h +43 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +22 -4
- package/ios/include/llama.h +15 -47
- package/ios/libs/llama.xcframework/Info.plist +13 -13
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -7,7 +7,6 @@ import pathlib
|
|
|
7
7
|
import re
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
-
import sys
|
|
11
10
|
import json
|
|
12
11
|
import shutil
|
|
13
12
|
import argparse
|
|
@@ -69,8 +68,7 @@ args = parser.parse_args()
|
|
|
69
68
|
hf_token = args.hf_token if args.hf_token is not None else hf_token
|
|
70
69
|
|
|
71
70
|
if hf_token is None:
|
|
72
|
-
logger.
|
|
73
|
-
sys.exit(1)
|
|
71
|
+
logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
|
|
74
72
|
|
|
75
73
|
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
|
76
74
|
# will be updated with time - contributions welcome
|
|
@@ -128,6 +126,10 @@ models = [
|
|
|
128
126
|
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
|
|
129
127
|
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
|
|
130
128
|
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
|
|
129
|
+
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
|
|
130
|
+
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
|
|
131
|
+
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
|
|
132
|
+
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
|
|
131
133
|
]
|
|
132
134
|
|
|
133
135
|
# some models are known to be broken upstream, so we will skip them as exceptions
|
|
@@ -137,11 +139,18 @@ pre_computed_hashes = [
|
|
|
137
139
|
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
|
138
140
|
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
|
139
141
|
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
|
142
|
+
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
|
|
143
|
+
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
|
|
144
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
|
|
145
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
|
|
146
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
|
|
147
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
|
|
148
|
+
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
|
|
140
149
|
]
|
|
141
150
|
|
|
142
151
|
|
|
143
152
|
def download_file_with_auth(url, token, save_path):
|
|
144
|
-
headers = {"Authorization": f"Bearer {token}"}
|
|
153
|
+
headers = {"Authorization": f"Bearer {token}"} if token else None
|
|
145
154
|
response = sess.get(url, headers=headers)
|
|
146
155
|
response.raise_for_status()
|
|
147
156
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
|
@@ -222,7 +231,7 @@ for model in models:
|
|
|
222
231
|
# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
|
|
223
232
|
|
|
224
233
|
src_ifs = ""
|
|
225
|
-
for model in [*
|
|
234
|
+
for model in [*pre_computed_hashes, *all_models]:
|
|
226
235
|
name = model["name"]
|
|
227
236
|
tokt = model["tokt"]
|
|
228
237
|
chkhsh = model.get("chkhsh")
|
|
@@ -230,11 +239,6 @@ for model in [*all_models, *pre_computed_hashes]:
|
|
|
230
239
|
if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
|
|
231
240
|
continue
|
|
232
241
|
|
|
233
|
-
# Skip if the tokenizer folder does not exist or there are other download issues previously
|
|
234
|
-
if not os.path.exists(f"models/tokenizers/{name}"):
|
|
235
|
-
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
|
236
|
-
continue
|
|
237
|
-
|
|
238
242
|
# create the tokenizer
|
|
239
243
|
if chkhsh is not None:
|
|
240
244
|
# if the model has a pre-computed hash, use it
|
|
@@ -244,15 +248,19 @@ for model in [*all_models, *pre_computed_hashes]:
|
|
|
244
248
|
chkhsh = existing_models[name]
|
|
245
249
|
else:
|
|
246
250
|
# otherwise, compute the hash of the tokenizer
|
|
251
|
+
|
|
252
|
+
# Fail if the tokenizer folder with config does not exist or there are other download issues previously
|
|
253
|
+
if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
|
|
254
|
+
raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")
|
|
255
|
+
|
|
247
256
|
try:
|
|
248
257
|
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
|
|
249
258
|
if name == "t5":
|
|
250
259
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
|
|
251
260
|
else:
|
|
252
261
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
253
|
-
except
|
|
254
|
-
|
|
255
|
-
continue # Skip to the next model if the tokenizer can't be loaded
|
|
262
|
+
except Exception as e:
|
|
263
|
+
raise OSError(f"Error loading tokenizer for model {name}.") from e
|
|
256
264
|
|
|
257
265
|
chktok = tokenizer.encode(CHK_TXT)
|
|
258
266
|
chkhsh = sha256(str(chktok).encode()).hexdigest()
|
|
@@ -131,7 +131,7 @@ option(GGML_RVV "ggml: enable rvv" ON)
|
|
|
131
131
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
|
|
132
132
|
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
|
133
133
|
option(GGML_VXE "ggml: enable vxe" ON)
|
|
134
|
-
option(GGML_NNPA "ggml: enable nnpa"
|
|
134
|
+
option(GGML_NNPA "ggml: enable nnpa" OFF) # temp disabled by default, see: https://github.com/ggml-org/llama.cpp/issues/14877
|
|
135
135
|
|
|
136
136
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
137
137
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
@@ -174,6 +174,8 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
|
|
|
174
174
|
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
|
175
175
|
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
|
176
176
|
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
|
177
|
+
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
|
|
178
|
+
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
|
|
177
179
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
178
180
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
179
181
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
@@ -181,7 +183,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
|
|
|
181
183
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
182
184
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
183
185
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
184
|
-
option(
|
|
186
|
+
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
|
|
187
|
+
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
|
|
185
188
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
|
186
189
|
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
187
190
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
|
@@ -266,12 +269,12 @@ set(GGML_PUBLIC_HEADERS
|
|
|
266
269
|
include/ggml-cann.h
|
|
267
270
|
include/ggml-cpp.h
|
|
268
271
|
include/ggml-cuda.h
|
|
269
|
-
include/ggml-kompute.h
|
|
270
272
|
include/ggml-opt.h
|
|
271
273
|
include/ggml-metal.h
|
|
272
274
|
include/ggml-rpc.h
|
|
273
275
|
include/ggml-sycl.h
|
|
274
276
|
include/ggml-vulkan.h
|
|
277
|
+
include/ggml-webgpu.h
|
|
275
278
|
include/gguf.h)
|
|
276
279
|
|
|
277
280
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
@@ -360,6 +363,13 @@ write_basic_package_version_file(
|
|
|
360
363
|
VERSION ${GGML_INSTALL_VERSION}
|
|
361
364
|
COMPATIBILITY SameMajorVersion)
|
|
362
365
|
|
|
366
|
+
target_compile_definitions(ggml-base PRIVATE
|
|
367
|
+
GGML_VERSION="${GGML_INSTALL_VERSION}"
|
|
368
|
+
GGML_COMMIT="${GGML_BUILD_COMMIT}"
|
|
369
|
+
)
|
|
370
|
+
message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
|
|
371
|
+
message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
|
|
372
|
+
|
|
363
373
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
|
364
374
|
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
|
365
375
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
|
|
@@ -1,94 +1,130 @@
|
|
|
1
|
-
|
|
2
|
-
@GGML_VARIABLES_EXPANDED@
|
|
3
|
-
|
|
4
1
|
@PACKAGE_INIT@
|
|
5
2
|
|
|
6
|
-
|
|
7
|
-
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
|
|
8
|
-
#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
|
|
9
|
-
|
|
10
|
-
find_package(Threads REQUIRED)
|
|
11
|
-
|
|
12
|
-
find_library(GGML_LIBRARY ggml
|
|
13
|
-
REQUIRED
|
|
14
|
-
HINTS ${GGML_LIB_DIR}
|
|
15
|
-
NO_CMAKE_FIND_ROOT_PATH)
|
|
16
|
-
|
|
17
|
-
add_library(ggml::ggml UNKNOWN IMPORTED)
|
|
18
|
-
set_target_properties(ggml::ggml
|
|
19
|
-
PROPERTIES
|
|
20
|
-
IMPORTED_LOCATION "${GGML_LIBRARY}")
|
|
21
|
-
|
|
22
|
-
find_library(GGML_BASE_LIBRARY ggml-base
|
|
23
|
-
REQUIRED
|
|
24
|
-
HINTS ${GGML_LIB_DIR}
|
|
25
|
-
NO_CMAKE_FIND_ROOT_PATH)
|
|
26
|
-
|
|
27
|
-
add_library(ggml::ggml-base UNKNOWN IMPORTED)
|
|
28
|
-
set_target_properties(ggml::ggml-base
|
|
29
|
-
PROPERTIES
|
|
30
|
-
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
|
|
3
|
+
@GGML_VARIABLES_EXPANDED@
|
|
31
4
|
|
|
5
|
+
# Find all dependencies before creating any target.
|
|
6
|
+
include(CMakeFindDependencyMacro)
|
|
7
|
+
find_dependency(Threads)
|
|
32
8
|
if (NOT GGML_SHARED_LIB)
|
|
9
|
+
set(GGML_CPU_INTERFACE_LINK_LIBRARIES "")
|
|
10
|
+
set(GGML_CPU_INTERFACE_LINK_OPTIONS "")
|
|
11
|
+
|
|
33
12
|
if (APPLE AND GGML_ACCELERATE)
|
|
34
|
-
find_library(ACCELERATE_FRAMEWORK Accelerate
|
|
13
|
+
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
14
|
+
if(NOT ACCELERATE_FRAMEWORK)
|
|
15
|
+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
|
|
16
|
+
return()
|
|
17
|
+
endif()
|
|
35
18
|
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
|
|
36
19
|
endif()
|
|
37
20
|
|
|
38
|
-
if (
|
|
39
|
-
|
|
21
|
+
if (GGML_OPENMP_ENABLED)
|
|
22
|
+
find_dependency(OpenMP)
|
|
40
23
|
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
|
41
24
|
endif()
|
|
42
25
|
|
|
43
26
|
if (GGML_CPU_HBM)
|
|
44
|
-
find_library(memkind memkind
|
|
27
|
+
find_library(memkind memkind)
|
|
28
|
+
if(NOT memkind)
|
|
29
|
+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
|
|
30
|
+
return()
|
|
31
|
+
endif()
|
|
45
32
|
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
|
|
46
33
|
endif()
|
|
47
34
|
|
|
48
35
|
if (GGML_BLAS)
|
|
49
|
-
|
|
36
|
+
find_dependency(BLAS)
|
|
50
37
|
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
|
|
51
38
|
list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS})
|
|
52
39
|
endif()
|
|
53
40
|
|
|
54
41
|
if (GGML_CUDA)
|
|
55
|
-
|
|
42
|
+
set(GGML_CUDA_INTERFACE_LINK_LIBRARIES "")
|
|
43
|
+
find_dependency(CUDAToolkit)
|
|
44
|
+
if (GGML_STATIC)
|
|
45
|
+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cudart_static>)
|
|
46
|
+
if (WIN32)
|
|
47
|
+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas> $<LINK_ONLY:CUDA::cublasLt>)
|
|
48
|
+
else()
|
|
49
|
+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas_static> $<LINK_ONLY:CUDA::cublasLt_static>)
|
|
50
|
+
endif()
|
|
51
|
+
endif()
|
|
52
|
+
if (NOT GGML_CUDA_NO_VMM)
|
|
53
|
+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cuda_driver>)
|
|
54
|
+
endif()
|
|
56
55
|
endif()
|
|
57
56
|
|
|
58
57
|
if (GGML_METAL)
|
|
59
|
-
find_library(FOUNDATION_LIBRARY Foundation
|
|
60
|
-
find_library(METAL_FRAMEWORK Metal
|
|
61
|
-
find_library(METALKIT_FRAMEWORK MetalKit
|
|
58
|
+
find_library(FOUNDATION_LIBRARY Foundation)
|
|
59
|
+
find_library(METAL_FRAMEWORK Metal)
|
|
60
|
+
find_library(METALKIT_FRAMEWORK MetalKit)
|
|
61
|
+
if(NOT FOUNDATION_LIBRARY OR NOT METAL_FRAMEWORK OR NOT METALKIT_FRAMEWORK)
|
|
62
|
+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
|
|
63
|
+
return()
|
|
64
|
+
endif()
|
|
65
|
+
set(GGML_METAL_INTERFACE_LINK_LIBRARIES
|
|
66
|
+
${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
|
|
67
|
+
endif()
|
|
62
68
|
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
if (GGML_OPENCL)
|
|
70
|
+
find_dependency(OpenCL)
|
|
71
|
+
set(GGML_OPENCL_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:OpenCL::OpenCL>)
|
|
65
72
|
endif()
|
|
66
73
|
|
|
67
74
|
if (GGML_VULKAN)
|
|
68
|
-
|
|
69
|
-
|
|
75
|
+
find_dependency(Vulkan)
|
|
76
|
+
set(GGML_VULKAN_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:Vulkan::Vulkan>)
|
|
70
77
|
endif()
|
|
71
78
|
|
|
72
79
|
if (GGML_HIP)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
80
|
+
find_dependency(hip)
|
|
81
|
+
find_dependency(hipblas)
|
|
82
|
+
find_dependency(rocblas)
|
|
83
|
+
set(GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
|
|
77
84
|
endif()
|
|
78
85
|
|
|
79
86
|
if (GGML_SYCL)
|
|
87
|
+
set(GGML_SYCL_INTERFACE_LINK_LIBRARIES "")
|
|
80
88
|
find_package(DNNL)
|
|
81
89
|
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
|
|
82
90
|
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
|
|
83
91
|
endif()
|
|
84
92
|
if (WIN32)
|
|
85
|
-
|
|
86
|
-
|
|
93
|
+
find_dependency(IntelSYCL)
|
|
94
|
+
find_dependency(MKL)
|
|
87
95
|
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
|
88
96
|
endif()
|
|
89
97
|
endif()
|
|
90
98
|
endif()
|
|
91
99
|
|
|
100
|
+
set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
|
|
101
|
+
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
|
|
102
|
+
#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
|
|
103
|
+
|
|
104
|
+
if(NOT TARGET ggml::ggml)
|
|
105
|
+
|
|
106
|
+
find_package(Threads REQUIRED)
|
|
107
|
+
|
|
108
|
+
find_library(GGML_LIBRARY ggml
|
|
109
|
+
REQUIRED
|
|
110
|
+
HINTS ${GGML_LIB_DIR}
|
|
111
|
+
NO_CMAKE_FIND_ROOT_PATH)
|
|
112
|
+
|
|
113
|
+
add_library(ggml::ggml UNKNOWN IMPORTED)
|
|
114
|
+
set_target_properties(ggml::ggml
|
|
115
|
+
PROPERTIES
|
|
116
|
+
IMPORTED_LOCATION "${GGML_LIBRARY}")
|
|
117
|
+
|
|
118
|
+
find_library(GGML_BASE_LIBRARY ggml-base
|
|
119
|
+
REQUIRED
|
|
120
|
+
HINTS ${GGML_LIB_DIR}
|
|
121
|
+
NO_CMAKE_FIND_ROOT_PATH)
|
|
122
|
+
|
|
123
|
+
add_library(ggml::ggml-base UNKNOWN IMPORTED)
|
|
124
|
+
set_target_properties(ggml::ggml-base
|
|
125
|
+
PROPERTIES
|
|
126
|
+
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
|
|
127
|
+
|
|
92
128
|
set(_ggml_all_targets "")
|
|
93
129
|
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
|
|
94
130
|
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
|
|
@@ -149,4 +185,6 @@ set_target_properties(ggml::all
|
|
|
149
185
|
PROPERTIES
|
|
150
186
|
INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")
|
|
151
187
|
|
|
188
|
+
endif() # TARGET ggml::ggml
|
|
189
|
+
|
|
152
190
|
check_required_components(ggml)
|
|
@@ -339,7 +339,7 @@ extern "C" {
|
|
|
339
339
|
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
|
340
340
|
|
|
341
341
|
// Compare the output of two backends
|
|
342
|
-
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
342
|
+
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
345
|
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
#include "ggml-backend.h"
|
|
5
|
+
|
|
6
|
+
#ifdef __cplusplus
|
|
7
|
+
extern "C" {
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#define GGML_WEBGPU_NAME "WebGPU"
|
|
11
|
+
|
|
12
|
+
// Needed for examples in ggml
|
|
13
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void);
|
|
14
|
+
|
|
15
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void);
|
|
16
|
+
|
|
17
|
+
#ifdef __cplusplus
|
|
18
|
+
}
|
|
19
|
+
#endif
|
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -482,12 +489,13 @@ extern "C" {
|
|
|
482
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
483
490
|
GGML_OP_IM2COL,
|
|
484
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
485
493
|
GGML_OP_CONV_2D_DW,
|
|
486
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
487
495
|
GGML_OP_POOL_1D,
|
|
488
496
|
GGML_OP_POOL_2D,
|
|
489
497
|
GGML_OP_POOL_2D_BACK,
|
|
490
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
491
499
|
GGML_OP_PAD,
|
|
492
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
501
|
GGML_OP_ROLL,
|
|
@@ -520,6 +528,8 @@ extern "C" {
|
|
|
520
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
521
529
|
GGML_OP_OPT_STEP_ADAMW,
|
|
522
530
|
|
|
531
|
+
GGML_OP_GLU,
|
|
532
|
+
|
|
523
533
|
GGML_OP_COUNT,
|
|
524
534
|
};
|
|
525
535
|
|
|
@@ -543,6 +553,16 @@ extern "C" {
|
|
|
543
553
|
GGML_UNARY_OP_COUNT,
|
|
544
554
|
};
|
|
545
555
|
|
|
556
|
+
enum ggml_glu_op {
|
|
557
|
+
GGML_GLU_OP_REGLU,
|
|
558
|
+
GGML_GLU_OP_GEGLU,
|
|
559
|
+
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
562
|
+
|
|
563
|
+
GGML_GLU_OP_COUNT,
|
|
564
|
+
};
|
|
565
|
+
|
|
546
566
|
enum ggml_object_type {
|
|
547
567
|
GGML_OBJECT_TYPE_TENSOR,
|
|
548
568
|
GGML_OBJECT_TYPE_GRAPH,
|
|
@@ -628,6 +648,9 @@ extern "C" {
|
|
|
628
648
|
|
|
629
649
|
// misc
|
|
630
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
631
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
632
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
633
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -658,6 +681,7 @@ extern "C" {
|
|
|
658
681
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
659
682
|
|
|
660
683
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
684
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
|
661
685
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
662
686
|
|
|
663
687
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
@@ -762,6 +786,7 @@ extern "C" {
|
|
|
762
786
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
763
787
|
|
|
764
788
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
789
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
|
765
790
|
|
|
766
791
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
767
792
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
@@ -1090,6 +1115,89 @@ extern "C" {
|
|
|
1090
1115
|
struct ggml_context * ctx,
|
|
1091
1116
|
struct ggml_tensor * a);
|
|
1092
1117
|
|
|
1118
|
+
// gated linear unit ops
|
|
1119
|
+
// A: n columns, r rows,
|
|
1120
|
+
// result is n / 2 columns, r rows,
|
|
1121
|
+
// expects gate in second half of row, unless swapped is true
|
|
1122
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
|
1123
|
+
struct ggml_context * ctx,
|
|
1124
|
+
struct ggml_tensor * a,
|
|
1125
|
+
enum ggml_glu_op op,
|
|
1126
|
+
bool swapped);
|
|
1127
|
+
|
|
1128
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
|
1129
|
+
struct ggml_context * ctx,
|
|
1130
|
+
struct ggml_tensor * a);
|
|
1131
|
+
|
|
1132
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
|
1133
|
+
struct ggml_context * ctx,
|
|
1134
|
+
struct ggml_tensor * a);
|
|
1135
|
+
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
|
1137
|
+
struct ggml_context * ctx,
|
|
1138
|
+
struct ggml_tensor * a);
|
|
1139
|
+
|
|
1140
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
|
1141
|
+
struct ggml_context * ctx,
|
|
1142
|
+
struct ggml_tensor * a);
|
|
1143
|
+
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
|
1145
|
+
struct ggml_context * ctx,
|
|
1146
|
+
struct ggml_tensor * a);
|
|
1147
|
+
|
|
1148
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * a);
|
|
1151
|
+
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1168
|
+
// A: n columns, r rows,
|
|
1169
|
+
// B: n columns, r rows,
|
|
1170
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
1171
|
+
struct ggml_context * ctx,
|
|
1172
|
+
struct ggml_tensor * a,
|
|
1173
|
+
struct ggml_tensor * b,
|
|
1174
|
+
enum ggml_glu_op op);
|
|
1175
|
+
|
|
1176
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
|
1177
|
+
struct ggml_context * ctx,
|
|
1178
|
+
struct ggml_tensor * a,
|
|
1179
|
+
struct ggml_tensor * b);
|
|
1180
|
+
|
|
1181
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
|
1182
|
+
struct ggml_context * ctx,
|
|
1183
|
+
struct ggml_tensor * a,
|
|
1184
|
+
struct ggml_tensor * b);
|
|
1185
|
+
|
|
1186
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
|
1187
|
+
struct ggml_context * ctx,
|
|
1188
|
+
struct ggml_tensor * a,
|
|
1189
|
+
struct ggml_tensor * b);
|
|
1190
|
+
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1093
1201
|
// normalize along rows
|
|
1094
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1095
1203
|
struct ggml_context * ctx,
|
|
@@ -1189,6 +1297,19 @@ extern "C" {
|
|
|
1189
1297
|
struct ggml_tensor * a,
|
|
1190
1298
|
float s);
|
|
1191
1299
|
|
|
1300
|
+
// x = s * a + b
|
|
1301
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
|
1302
|
+
struct ggml_context * ctx,
|
|
1303
|
+
struct ggml_tensor * a,
|
|
1304
|
+
float s,
|
|
1305
|
+
float b);
|
|
1306
|
+
|
|
1307
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
|
1308
|
+
struct ggml_context * ctx,
|
|
1309
|
+
struct ggml_tensor * a,
|
|
1310
|
+
float s,
|
|
1311
|
+
float b);
|
|
1312
|
+
|
|
1192
1313
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1193
1314
|
GGML_API struct ggml_tensor * ggml_set(
|
|
1194
1315
|
struct ggml_context * ctx,
|
|
@@ -1433,8 +1554,14 @@ extern "C" {
|
|
|
1433
1554
|
struct ggml_context * ctx,
|
|
1434
1555
|
struct ggml_tensor * a);
|
|
1435
1556
|
|
|
1557
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1558
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1559
|
+
//
|
|
1560
|
+
// broadcast:
|
|
1561
|
+
// ne02 % ne12 == 0
|
|
1562
|
+
// ne03 % ne13 == 0
|
|
1563
|
+
//
|
|
1436
1564
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1437
|
-
// mask is optional
|
|
1438
1565
|
// max_bias = 0.0f for no ALiBi
|
|
1439
1566
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1440
1567
|
struct ggml_context * ctx,
|
|
@@ -1744,6 +1871,17 @@ extern "C" {
|
|
|
1744
1871
|
struct ggml_tensor * b,
|
|
1745
1872
|
int stride);
|
|
1746
1873
|
|
|
1874
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1875
|
+
struct ggml_context * ctx,
|
|
1876
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1877
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1878
|
+
int s0, // stride dimension 0
|
|
1879
|
+
int s1, // stride dimension 1
|
|
1880
|
+
int p0, // padding dimension 0
|
|
1881
|
+
int p1, // padding dimension 1
|
|
1882
|
+
int d0, // dilation dimension 0
|
|
1883
|
+
int d1); // dilation dimension 1
|
|
1884
|
+
|
|
1747
1885
|
enum ggml_op_pool {
|
|
1748
1886
|
GGML_OP_POOL_MAX,
|
|
1749
1887
|
GGML_OP_POOL_AVG,
|
|
@@ -1786,6 +1924,12 @@ extern "C" {
|
|
|
1786
1924
|
enum ggml_scale_mode {
|
|
1787
1925
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1788
1926
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1927
|
+
|
|
1928
|
+
GGML_SCALE_MODE_COUNT
|
|
1929
|
+
};
|
|
1930
|
+
|
|
1931
|
+
enum ggml_scale_flag {
|
|
1932
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1789
1933
|
};
|
|
1790
1934
|
|
|
1791
1935
|
// interpolate
|
|
@@ -1798,14 +1942,26 @@ extern "C" {
|
|
|
1798
1942
|
|
|
1799
1943
|
// interpolate
|
|
1800
1944
|
// interpolate scale to specified dimensions
|
|
1801
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1945
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1802
1946
|
struct ggml_context * ctx,
|
|
1803
1947
|
struct ggml_tensor * a,
|
|
1804
1948
|
int ne0,
|
|
1805
1949
|
int ne1,
|
|
1806
1950
|
int ne2,
|
|
1807
1951
|
int ne3,
|
|
1808
|
-
enum ggml_scale_mode mode)
|
|
1952
|
+
enum ggml_scale_mode mode),
|
|
1953
|
+
"use ggml_interpolate instead");
|
|
1954
|
+
|
|
1955
|
+
// Up- or downsamples the input to the specified size.
|
|
1956
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1957
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1958
|
+
struct ggml_context * ctx,
|
|
1959
|
+
struct ggml_tensor * a,
|
|
1960
|
+
int64_t ne0,
|
|
1961
|
+
int64_t ne1,
|
|
1962
|
+
int64_t ne2,
|
|
1963
|
+
int64_t ne3,
|
|
1964
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1809
1965
|
|
|
1810
1966
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1811
1967
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1868,11 +2024,17 @@ extern "C" {
|
|
|
1868
2024
|
|
|
1869
2025
|
#define GGML_KQ_MASK_PAD 64
|
|
1870
2026
|
|
|
1871
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1872
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1873
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1874
|
-
// mask: [n_kv, n_batch_pad,
|
|
1875
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2027
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2028
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2029
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2030
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2031
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2032
|
+
//
|
|
2033
|
+
// broadcast:
|
|
2034
|
+
// n_head % n_head_kv == 0
|
|
2035
|
+
// n_head % ne32 == 0
|
|
2036
|
+
// ne3 % ne33 == 0
|
|
2037
|
+
//
|
|
1876
2038
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1877
2039
|
struct ggml_context * ctx,
|
|
1878
2040
|
struct ggml_tensor * q,
|
|
@@ -1911,7 +2073,8 @@ extern "C" {
|
|
|
1911
2073
|
struct ggml_tensor * dt,
|
|
1912
2074
|
struct ggml_tensor * A,
|
|
1913
2075
|
struct ggml_tensor * B,
|
|
1914
|
-
struct ggml_tensor * C
|
|
2076
|
+
struct ggml_tensor * C,
|
|
2077
|
+
struct ggml_tensor * ids);
|
|
1915
2078
|
|
|
1916
2079
|
// partition into non-overlapping windows with padding if needed
|
|
1917
2080
|
// example:
|
|
@@ -365,12 +365,12 @@ ggml_add_backend(BLAS)
|
|
|
365
365
|
ggml_add_backend(CANN)
|
|
366
366
|
ggml_add_backend(CUDA)
|
|
367
367
|
ggml_add_backend(HIP)
|
|
368
|
-
ggml_add_backend(Kompute)
|
|
369
368
|
ggml_add_backend(METAL)
|
|
370
369
|
ggml_add_backend(MUSA)
|
|
371
370
|
ggml_add_backend(RPC)
|
|
372
371
|
ggml_add_backend(SYCL)
|
|
373
372
|
ggml_add_backend(Vulkan)
|
|
373
|
+
ggml_add_backend(WebGPU)
|
|
374
374
|
ggml_add_backend(OpenCL)
|
|
375
375
|
|
|
376
376
|
foreach (target ggml-base ggml)
|