@novastera-oss/llamarn 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -2
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +24 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +5 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
- package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -43
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
- package/cpp/llama.cpp/src/llama-arch.h +36 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
- package/cpp/llama.cpp/src/llama-batch.h +105 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
- package/cpp/llama.cpp/src/llama-graph.h +78 -79
- package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
- package/cpp/llama.cpp/src/llama-hparams.h +11 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
- package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +21 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
- package/cpp/llama.cpp/src/llama-model.h +40 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
- package/cpp/llama.cpp/src/llama-vocab.h +42 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +5 -0
- package/ios/include/llama.h +8 -43
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -128,6 +128,9 @@ models = [
|
|
|
128
128
|
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
|
|
129
129
|
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
|
|
130
130
|
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
|
|
131
|
+
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
|
|
132
|
+
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
|
|
133
|
+
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
|
|
131
134
|
]
|
|
132
135
|
|
|
133
136
|
# some models are known to be broken upstream, so we will skip them as exceptions
|
|
@@ -137,6 +140,12 @@ pre_computed_hashes = [
|
|
|
137
140
|
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
|
138
141
|
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
|
139
142
|
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
|
143
|
+
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
|
|
144
|
+
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
|
|
145
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
|
|
146
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
|
|
147
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
|
|
148
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
|
|
140
149
|
]
|
|
141
150
|
|
|
142
151
|
|
|
@@ -131,6 +131,7 @@ option(GGML_RVV "ggml: enable rvv" ON)
|
|
|
131
131
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
|
|
132
132
|
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
|
133
133
|
option(GGML_VXE "ggml: enable vxe" ON)
|
|
134
|
+
option(GGML_NNPA "ggml: enable nnpa" ON)
|
|
134
135
|
|
|
135
136
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
136
137
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
@@ -180,7 +181,6 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
|
|
|
180
181
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
181
182
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
182
183
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
183
|
-
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
184
184
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
|
185
185
|
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
186
186
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
|
@@ -265,7 +265,6 @@ set(GGML_PUBLIC_HEADERS
|
|
|
265
265
|
include/ggml-cann.h
|
|
266
266
|
include/ggml-cpp.h
|
|
267
267
|
include/ggml-cuda.h
|
|
268
|
-
include/ggml-kompute.h
|
|
269
268
|
include/ggml-opt.h
|
|
270
269
|
include/ggml-metal.h
|
|
271
270
|
include/ggml-rpc.h
|
|
@@ -359,6 +358,13 @@ write_basic_package_version_file(
|
|
|
359
358
|
VERSION ${GGML_INSTALL_VERSION}
|
|
360
359
|
COMPATIBILITY SameMajorVersion)
|
|
361
360
|
|
|
361
|
+
target_compile_definitions(ggml-base PRIVATE
|
|
362
|
+
GGML_VERSION="${GGML_INSTALL_VERSION}"
|
|
363
|
+
GGML_COMMIT="${GGML_BUILD_COMMIT}"
|
|
364
|
+
)
|
|
365
|
+
message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
|
|
366
|
+
message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
|
|
367
|
+
|
|
362
368
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
|
363
369
|
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
|
364
370
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
|
|
@@ -339,7 +339,7 @@ extern "C" {
|
|
|
339
339
|
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
|
340
340
|
|
|
341
341
|
// Compare the output of two backends
|
|
342
|
-
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
342
|
+
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
345
|
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
@@ -101,6 +101,7 @@ extern "C" {
|
|
|
101
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
102
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
103
|
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
104
|
+
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
|
|
104
105
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
105
106
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
106
107
|
|
|
@@ -133,6 +134,7 @@ extern "C" {
|
|
|
133
134
|
|
|
134
135
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
135
136
|
|
|
137
|
+
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
136
138
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
|
137
139
|
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
|
138
140
|
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -470,6 +477,7 @@ extern "C" {
|
|
|
470
477
|
GGML_OP_TRANSPOSE,
|
|
471
478
|
GGML_OP_GET_ROWS,
|
|
472
479
|
GGML_OP_GET_ROWS_BACK,
|
|
480
|
+
GGML_OP_SET_ROWS,
|
|
473
481
|
GGML_OP_DIAG,
|
|
474
482
|
GGML_OP_DIAG_MASK_INF,
|
|
475
483
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -481,14 +489,16 @@ extern "C" {
|
|
|
481
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
482
490
|
GGML_OP_IM2COL,
|
|
483
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
484
493
|
GGML_OP_CONV_2D_DW,
|
|
485
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
486
495
|
GGML_OP_POOL_1D,
|
|
487
496
|
GGML_OP_POOL_2D,
|
|
488
497
|
GGML_OP_POOL_2D_BACK,
|
|
489
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
490
499
|
GGML_OP_PAD,
|
|
491
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
501
|
+
GGML_OP_ROLL,
|
|
492
502
|
GGML_OP_ARANGE,
|
|
493
503
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
504
|
GGML_OP_ARGSORT,
|
|
@@ -518,6 +528,8 @@ extern "C" {
|
|
|
518
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
519
529
|
GGML_OP_OPT_STEP_ADAMW,
|
|
520
530
|
|
|
531
|
+
GGML_OP_GLU,
|
|
532
|
+
|
|
521
533
|
GGML_OP_COUNT,
|
|
522
534
|
};
|
|
523
535
|
|
|
@@ -541,6 +553,16 @@ extern "C" {
|
|
|
541
553
|
GGML_UNARY_OP_COUNT,
|
|
542
554
|
};
|
|
543
555
|
|
|
556
|
+
enum ggml_glu_op {
|
|
557
|
+
GGML_GLU_OP_REGLU,
|
|
558
|
+
GGML_GLU_OP_GEGLU,
|
|
559
|
+
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
562
|
+
|
|
563
|
+
GGML_GLU_OP_COUNT,
|
|
564
|
+
};
|
|
565
|
+
|
|
544
566
|
enum ggml_object_type {
|
|
545
567
|
GGML_OBJECT_TYPE_TENSOR,
|
|
546
568
|
GGML_OBJECT_TYPE_GRAPH,
|
|
@@ -626,6 +648,9 @@ extern "C" {
|
|
|
626
648
|
|
|
627
649
|
// misc
|
|
628
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
629
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
630
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
631
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -656,6 +681,7 @@ extern "C" {
|
|
|
656
681
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
657
682
|
|
|
658
683
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
684
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
|
659
685
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
660
686
|
|
|
661
687
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
@@ -686,6 +712,9 @@ extern "C" {
|
|
|
686
712
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
687
713
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
688
714
|
|
|
715
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
716
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
717
|
+
|
|
689
718
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
690
719
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
720
|
|
|
@@ -757,6 +786,7 @@ extern "C" {
|
|
|
757
786
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
758
787
|
|
|
759
788
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
789
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
|
760
790
|
|
|
761
791
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
762
792
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
@@ -1085,6 +1115,89 @@ extern "C" {
|
|
|
1085
1115
|
struct ggml_context * ctx,
|
|
1086
1116
|
struct ggml_tensor * a);
|
|
1087
1117
|
|
|
1118
|
+
// gated linear unit ops
|
|
1119
|
+
// A: n columns, r rows,
|
|
1120
|
+
// result is n / 2 columns, r rows,
|
|
1121
|
+
// expects gate in second half of row, unless swapped is true
|
|
1122
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
|
1123
|
+
struct ggml_context * ctx,
|
|
1124
|
+
struct ggml_tensor * a,
|
|
1125
|
+
enum ggml_glu_op op,
|
|
1126
|
+
bool swapped);
|
|
1127
|
+
|
|
1128
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
|
1129
|
+
struct ggml_context * ctx,
|
|
1130
|
+
struct ggml_tensor * a);
|
|
1131
|
+
|
|
1132
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
|
1133
|
+
struct ggml_context * ctx,
|
|
1134
|
+
struct ggml_tensor * a);
|
|
1135
|
+
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
|
1137
|
+
struct ggml_context * ctx,
|
|
1138
|
+
struct ggml_tensor * a);
|
|
1139
|
+
|
|
1140
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
|
1141
|
+
struct ggml_context * ctx,
|
|
1142
|
+
struct ggml_tensor * a);
|
|
1143
|
+
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
|
1145
|
+
struct ggml_context * ctx,
|
|
1146
|
+
struct ggml_tensor * a);
|
|
1147
|
+
|
|
1148
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * a);
|
|
1151
|
+
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1168
|
+
// A: n columns, r rows,
|
|
1169
|
+
// B: n columns, r rows,
|
|
1170
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
1171
|
+
struct ggml_context * ctx,
|
|
1172
|
+
struct ggml_tensor * a,
|
|
1173
|
+
struct ggml_tensor * b,
|
|
1174
|
+
enum ggml_glu_op op);
|
|
1175
|
+
|
|
1176
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
|
1177
|
+
struct ggml_context * ctx,
|
|
1178
|
+
struct ggml_tensor * a,
|
|
1179
|
+
struct ggml_tensor * b);
|
|
1180
|
+
|
|
1181
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
|
1182
|
+
struct ggml_context * ctx,
|
|
1183
|
+
struct ggml_tensor * a,
|
|
1184
|
+
struct ggml_tensor * b);
|
|
1185
|
+
|
|
1186
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
|
1187
|
+
struct ggml_context * ctx,
|
|
1188
|
+
struct ggml_tensor * a,
|
|
1189
|
+
struct ggml_tensor * b);
|
|
1190
|
+
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1088
1201
|
// normalize along rows
|
|
1089
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1090
1203
|
struct ggml_context * ctx,
|
|
@@ -1184,6 +1297,19 @@ extern "C" {
|
|
|
1184
1297
|
struct ggml_tensor * a,
|
|
1185
1298
|
float s);
|
|
1186
1299
|
|
|
1300
|
+
// x = s * a + b
|
|
1301
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
|
1302
|
+
struct ggml_context * ctx,
|
|
1303
|
+
struct ggml_tensor * a,
|
|
1304
|
+
float s,
|
|
1305
|
+
float b);
|
|
1306
|
+
|
|
1307
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
|
1308
|
+
struct ggml_context * ctx,
|
|
1309
|
+
struct ggml_tensor * a,
|
|
1310
|
+
float s,
|
|
1311
|
+
float b);
|
|
1312
|
+
|
|
1187
1313
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1188
1314
|
GGML_API struct ggml_tensor * ggml_set(
|
|
1189
1315
|
struct ggml_context * ctx,
|
|
@@ -1374,6 +1500,23 @@ extern "C" {
|
|
|
1374
1500
|
struct ggml_tensor * b, // row indices
|
|
1375
1501
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1376
1502
|
|
|
1503
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1504
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1505
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1506
|
+
//
|
|
1507
|
+
// undefined behavior if destination rows overlap
|
|
1508
|
+
//
|
|
1509
|
+
// broadcast:
|
|
1510
|
+
// ne2 % ne11 == 0
|
|
1511
|
+
// ne3 % ne12 == 0
|
|
1512
|
+
//
|
|
1513
|
+
// return view(a)
|
|
1514
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1515
|
+
struct ggml_context * ctx,
|
|
1516
|
+
struct ggml_tensor * a, // destination
|
|
1517
|
+
struct ggml_tensor * b, // source
|
|
1518
|
+
struct ggml_tensor * c); // row indices
|
|
1519
|
+
|
|
1377
1520
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1378
1521
|
struct ggml_context * ctx,
|
|
1379
1522
|
struct ggml_tensor * a);
|
|
@@ -1411,8 +1554,14 @@ extern "C" {
|
|
|
1411
1554
|
struct ggml_context * ctx,
|
|
1412
1555
|
struct ggml_tensor * a);
|
|
1413
1556
|
|
|
1557
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1558
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1559
|
+
//
|
|
1560
|
+
// broadcast:
|
|
1561
|
+
// ne02 % ne12 == 0
|
|
1562
|
+
// ne03 % ne13 == 0
|
|
1563
|
+
//
|
|
1414
1564
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1415
|
-
// mask is optional
|
|
1416
1565
|
// max_bias = 0.0f for no ALiBi
|
|
1417
1566
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1418
1567
|
struct ggml_context * ctx,
|
|
@@ -1722,6 +1871,17 @@ extern "C" {
|
|
|
1722
1871
|
struct ggml_tensor * b,
|
|
1723
1872
|
int stride);
|
|
1724
1873
|
|
|
1874
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1875
|
+
struct ggml_context * ctx,
|
|
1876
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1877
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1878
|
+
int s0, // stride dimension 0
|
|
1879
|
+
int s1, // stride dimension 1
|
|
1880
|
+
int p0, // padding dimension 0
|
|
1881
|
+
int p1, // padding dimension 1
|
|
1882
|
+
int d0, // dilation dimension 0
|
|
1883
|
+
int d1); // dilation dimension 1
|
|
1884
|
+
|
|
1725
1885
|
enum ggml_op_pool {
|
|
1726
1886
|
GGML_OP_POOL_MAX,
|
|
1727
1887
|
GGML_OP_POOL_AVG,
|
|
@@ -1764,6 +1924,12 @@ extern "C" {
|
|
|
1764
1924
|
enum ggml_scale_mode {
|
|
1765
1925
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1766
1926
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1927
|
+
|
|
1928
|
+
GGML_SCALE_MODE_COUNT
|
|
1929
|
+
};
|
|
1930
|
+
|
|
1931
|
+
enum ggml_scale_flag {
|
|
1932
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1767
1933
|
};
|
|
1768
1934
|
|
|
1769
1935
|
// interpolate
|
|
@@ -1776,14 +1942,26 @@ extern "C" {
|
|
|
1776
1942
|
|
|
1777
1943
|
// interpolate
|
|
1778
1944
|
// interpolate scale to specified dimensions
|
|
1779
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1945
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1780
1946
|
struct ggml_context * ctx,
|
|
1781
1947
|
struct ggml_tensor * a,
|
|
1782
1948
|
int ne0,
|
|
1783
1949
|
int ne1,
|
|
1784
1950
|
int ne2,
|
|
1785
1951
|
int ne3,
|
|
1786
|
-
enum ggml_scale_mode mode)
|
|
1952
|
+
enum ggml_scale_mode mode),
|
|
1953
|
+
"use ggml_interpolate instead");
|
|
1954
|
+
|
|
1955
|
+
// Up- or downsamples the input to the specified size.
|
|
1956
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1957
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1958
|
+
struct ggml_context * ctx,
|
|
1959
|
+
struct ggml_tensor * a,
|
|
1960
|
+
int64_t ne0,
|
|
1961
|
+
int64_t ne1,
|
|
1962
|
+
int64_t ne2,
|
|
1963
|
+
int64_t ne3,
|
|
1964
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1787
1965
|
|
|
1788
1966
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1789
1967
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1801,6 +1979,17 @@ extern "C" {
|
|
|
1801
1979
|
int p0,
|
|
1802
1980
|
int p1);
|
|
1803
1981
|
|
|
1982
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1983
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1984
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1985
|
+
struct ggml_context * ctx,
|
|
1986
|
+
struct ggml_tensor * a,
|
|
1987
|
+
int shift0,
|
|
1988
|
+
int shift1,
|
|
1989
|
+
int shift2,
|
|
1990
|
+
int shift3);
|
|
1991
|
+
|
|
1992
|
+
|
|
1804
1993
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1805
1994
|
// timesteps: [N,]
|
|
1806
1995
|
// return: [N, dim]
|
|
@@ -1835,11 +2024,17 @@ extern "C" {
|
|
|
1835
2024
|
|
|
1836
2025
|
#define GGML_KQ_MASK_PAD 64
|
|
1837
2026
|
|
|
1838
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1839
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1840
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1841
|
-
// mask: [n_kv, n_batch_pad,
|
|
1842
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2027
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2028
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2029
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2030
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2031
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2032
|
+
//
|
|
2033
|
+
// broadcast:
|
|
2034
|
+
// n_head % n_head_kv == 0
|
|
2035
|
+
// n_head % ne32 == 0
|
|
2036
|
+
// ne3 % ne33 == 0
|
|
2037
|
+
//
|
|
1843
2038
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1844
2039
|
struct ggml_context * ctx,
|
|
1845
2040
|
struct ggml_tensor * q,
|
|
@@ -1878,7 +2073,8 @@ extern "C" {
|
|
|
1878
2073
|
struct ggml_tensor * dt,
|
|
1879
2074
|
struct ggml_tensor * A,
|
|
1880
2075
|
struct ggml_tensor * B,
|
|
1881
|
-
struct ggml_tensor * C
|
|
2076
|
+
struct ggml_tensor * C,
|
|
2077
|
+
struct ggml_tensor * ids);
|
|
1882
2078
|
|
|
1883
2079
|
// partition into non-overlapping windows with padding if needed
|
|
1884
2080
|
// example:
|
|
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
286
286
|
foreach (feat ${ARGN})
|
|
287
287
|
set(GGML_INTERNAL_${feat} ON)
|
|
288
288
|
endforeach()
|
|
289
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
290
|
+
foreach (feat ${ARGN})
|
|
291
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
292
|
+
endforeach()
|
|
289
293
|
endif()
|
|
290
294
|
|
|
291
295
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
337
341
|
else()
|
|
338
342
|
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
|
339
343
|
endif()
|
|
344
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
345
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
346
|
+
ggml_add_cpu_backend_variant(power0)
|
|
347
|
+
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
|
348
|
+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
|
349
|
+
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
|
350
|
+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
|
351
|
+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
|
352
|
+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
|
353
|
+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
|
354
|
+
else()
|
|
355
|
+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
|
356
|
+
endif()
|
|
340
357
|
else()
|
|
341
358
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
|
342
359
|
endif()
|
|
@@ -348,7 +365,6 @@ ggml_add_backend(BLAS)
|
|
|
348
365
|
ggml_add_backend(CANN)
|
|
349
366
|
ggml_add_backend(CUDA)
|
|
350
367
|
ggml_add_backend(HIP)
|
|
351
|
-
ggml_add_backend(Kompute)
|
|
352
368
|
ggml_add_backend(METAL)
|
|
353
369
|
ggml_add_backend(MUSA)
|
|
354
370
|
ggml_add_backend(RPC)
|
|
@@ -61,10 +61,6 @@
|
|
|
61
61
|
#include "ggml-cann.h"
|
|
62
62
|
#endif
|
|
63
63
|
|
|
64
|
-
#ifdef GGML_USE_KOMPUTE
|
|
65
|
-
#include "ggml-kompute.h"
|
|
66
|
-
#endif
|
|
67
|
-
|
|
68
64
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
69
65
|
#if defined(__clang__)
|
|
70
66
|
# pragma clang diagnostic push
|
|
@@ -189,9 +185,6 @@ struct ggml_backend_registry {
|
|
|
189
185
|
#ifdef GGML_USE_RPC
|
|
190
186
|
register_backend(ggml_backend_rpc_reg());
|
|
191
187
|
#endif
|
|
192
|
-
#ifdef GGML_USE_KOMPUTE
|
|
193
|
-
register_backend(ggml_backend_kompute_reg());
|
|
194
|
-
#endif
|
|
195
188
|
#ifdef GGML_USE_CPU
|
|
196
189
|
register_backend(ggml_backend_cpu_reg());
|
|
197
190
|
#endif
|
|
@@ -575,7 +568,6 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
575
568
|
ggml_backend_load_best("cann", silent, dir_path);
|
|
576
569
|
ggml_backend_load_best("cuda", silent, dir_path);
|
|
577
570
|
ggml_backend_load_best("hip", silent, dir_path);
|
|
578
|
-
ggml_backend_load_best("kompute", silent, dir_path);
|
|
579
571
|
ggml_backend_load_best("metal", silent, dir_path);
|
|
580
572
|
ggml_backend_load_best("rpc", silent, dir_path);
|
|
581
573
|
ggml_backend_load_best("sycl", silent, dir_path);
|
|
@@ -817,8 +817,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
|
817
817
|
}
|
|
818
818
|
if (sched->debug > 1) {
|
|
819
819
|
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
|
820
|
-
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
|
821
|
-
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)
|
|
820
|
+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
|
|
821
|
+
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
|
822
|
+
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
|
|
822
823
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
|
823
824
|
struct ggml_tensor * src = node->src[j];
|
|
824
825
|
if (src == NULL) {
|
|
@@ -1826,7 +1827,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
|
|
1826
1827
|
ggml_free(copy.ctx_unallocated);
|
|
1827
1828
|
}
|
|
1828
1829
|
|
|
1829
|
-
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
|
|
1830
|
+
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
|
1830
1831
|
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
|
1831
1832
|
if (copy.buffer == NULL) {
|
|
1832
1833
|
return false;
|
|
@@ -1837,28 +1838,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
|
1837
1838
|
|
|
1838
1839
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1839
1840
|
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1841
|
+
if (test_node != nullptr) {
|
|
1842
|
+
// Compute the whole graph and only test the output for a specific tensor
|
|
1843
|
+
ggml_backend_graph_compute(backend1, g1);
|
|
1844
|
+
ggml_backend_graph_compute(backend2, g2);
|
|
1843
1845
|
|
|
1844
|
-
|
|
1846
|
+
int test_node_idx = -1;
|
|
1847
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1848
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1849
|
+
if (t1 == test_node) {
|
|
1850
|
+
test_node_idx = i;
|
|
1851
|
+
break;
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
GGML_ASSERT(test_node_idx != -1);
|
|
1845
1855
|
|
|
1846
|
-
|
|
1847
|
-
|
|
1856
|
+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
1857
|
+
} else {
|
|
1858
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1859
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1860
|
+
struct ggml_tensor * t2 = g2->nodes[i];
|
|
1848
1861
|
|
|
1849
|
-
|
|
1850
|
-
ggml_backend_graph_compute(backend2, &g2v);
|
|
1862
|
+
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
|
|
1851
1863
|
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
}
|
|
1864
|
+
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
|
|
1865
|
+
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
|
|
1855
1866
|
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1867
|
+
ggml_backend_graph_compute(backend1, &g1v);
|
|
1868
|
+
ggml_backend_graph_compute(backend2, &g2v);
|
|
1869
|
+
|
|
1870
|
+
if (ggml_is_view_op(t1->op)) {
|
|
1871
|
+
continue;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
// compare results, calculate rms etc
|
|
1875
|
+
if (!callback(i, t1, t2, user_data)) {
|
|
1876
|
+
break;
|
|
1877
|
+
}
|
|
1859
1878
|
}
|
|
1860
1879
|
}
|
|
1861
|
-
|
|
1862
1880
|
ggml_backend_graph_copy_free(copy);
|
|
1863
1881
|
|
|
1864
1882
|
return true;
|