@novastera-oss/llamarn 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +17 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.h +4 -0
- package/cpp/llama.cpp/convert_hf_to_gguf.py +745 -6
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1203 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +185 -79
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +64 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +35 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +167 -39
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +254 -57
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +505 -40
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +60 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +711 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +382 -61
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +209 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +73 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +0 -40
- package/cpp/llama.cpp/src/llama-arch.cpp +210 -3
- package/cpp/llama.cpp/src/llama-arch.h +18 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +27 -1
- package/cpp/llama.cpp/src/llama-batch.h +8 -1
- package/cpp/llama.cpp/src/llama-chat.cpp +15 -0
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-graph.cpp +119 -184
- package/cpp/llama.cpp/src/llama-graph.h +47 -60
- package/cpp/llama.cpp/src/llama-hparams.cpp +7 -1
- package/cpp/llama.cpp/src/llama-hparams.h +3 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +20 -10
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +2530 -685
- package/cpp/llama.cpp/src/llama-model.h +18 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +1 -0
- package/cpp/llama.cpp/src/llama-vocab.cpp +13 -2
- package/cpp/llama.cpp/src/llama-vocab.h +41 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +4 -0
- package/ios/include/llama.h +0 -40
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5055 -4886
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5091 -4922
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -128,6 +128,9 @@ models = [
|
|
|
128
128
|
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
|
|
129
129
|
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
|
|
130
130
|
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
|
|
131
|
+
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
|
|
132
|
+
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
|
|
133
|
+
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
|
|
131
134
|
]
|
|
132
135
|
|
|
133
136
|
# some models are known to be broken upstream, so we will skip them as exceptions
|
|
@@ -137,6 +140,12 @@ pre_computed_hashes = [
|
|
|
137
140
|
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
|
138
141
|
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
|
139
142
|
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
|
143
|
+
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
|
|
144
|
+
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
|
|
145
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
|
|
146
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
|
|
147
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
|
|
148
|
+
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
|
|
140
149
|
]
|
|
141
150
|
|
|
142
151
|
|
|
@@ -181,7 +181,6 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
|
|
|
181
181
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
182
182
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
183
183
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
184
|
-
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
185
184
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
|
186
185
|
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
187
186
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
|
@@ -266,7 +265,6 @@ set(GGML_PUBLIC_HEADERS
|
|
|
266
265
|
include/ggml-cann.h
|
|
267
266
|
include/ggml-cpp.h
|
|
268
267
|
include/ggml-cuda.h
|
|
269
|
-
include/ggml-kompute.h
|
|
270
268
|
include/ggml-opt.h
|
|
271
269
|
include/ggml-metal.h
|
|
272
270
|
include/ggml-rpc.h
|
|
@@ -360,6 +358,13 @@ write_basic_package_version_file(
|
|
|
360
358
|
VERSION ${GGML_INSTALL_VERSION}
|
|
361
359
|
COMPATIBILITY SameMajorVersion)
|
|
362
360
|
|
|
361
|
+
target_compile_definitions(ggml-base PRIVATE
|
|
362
|
+
GGML_VERSION="${GGML_INSTALL_VERSION}"
|
|
363
|
+
GGML_COMMIT="${GGML_BUILD_COMMIT}"
|
|
364
|
+
)
|
|
365
|
+
message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
|
|
366
|
+
message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
|
|
367
|
+
|
|
363
368
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
|
364
369
|
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
|
365
370
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
|
|
@@ -339,7 +339,7 @@ extern "C" {
|
|
|
339
339
|
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
|
340
340
|
|
|
341
341
|
// Compare the output of two backends
|
|
342
|
-
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
342
|
+
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
345
|
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -482,12 +489,13 @@ extern "C" {
|
|
|
482
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
483
490
|
GGML_OP_IM2COL,
|
|
484
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
485
493
|
GGML_OP_CONV_2D_DW,
|
|
486
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
487
495
|
GGML_OP_POOL_1D,
|
|
488
496
|
GGML_OP_POOL_2D,
|
|
489
497
|
GGML_OP_POOL_2D_BACK,
|
|
490
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
491
499
|
GGML_OP_PAD,
|
|
492
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
501
|
GGML_OP_ROLL,
|
|
@@ -520,6 +528,8 @@ extern "C" {
|
|
|
520
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
521
529
|
GGML_OP_OPT_STEP_ADAMW,
|
|
522
530
|
|
|
531
|
+
GGML_OP_GLU,
|
|
532
|
+
|
|
523
533
|
GGML_OP_COUNT,
|
|
524
534
|
};
|
|
525
535
|
|
|
@@ -543,6 +553,16 @@ extern "C" {
|
|
|
543
553
|
GGML_UNARY_OP_COUNT,
|
|
544
554
|
};
|
|
545
555
|
|
|
556
|
+
enum ggml_glu_op {
|
|
557
|
+
GGML_GLU_OP_REGLU,
|
|
558
|
+
GGML_GLU_OP_GEGLU,
|
|
559
|
+
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
562
|
+
|
|
563
|
+
GGML_GLU_OP_COUNT,
|
|
564
|
+
};
|
|
565
|
+
|
|
546
566
|
enum ggml_object_type {
|
|
547
567
|
GGML_OBJECT_TYPE_TENSOR,
|
|
548
568
|
GGML_OBJECT_TYPE_GRAPH,
|
|
@@ -628,6 +648,9 @@ extern "C" {
|
|
|
628
648
|
|
|
629
649
|
// misc
|
|
630
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
631
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
632
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
633
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -658,6 +681,7 @@ extern "C" {
|
|
|
658
681
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
659
682
|
|
|
660
683
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
684
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
|
661
685
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
662
686
|
|
|
663
687
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
@@ -762,6 +786,7 @@ extern "C" {
|
|
|
762
786
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
763
787
|
|
|
764
788
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
789
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
|
765
790
|
|
|
766
791
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
767
792
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
@@ -1090,6 +1115,89 @@ extern "C" {
|
|
|
1090
1115
|
struct ggml_context * ctx,
|
|
1091
1116
|
struct ggml_tensor * a);
|
|
1092
1117
|
|
|
1118
|
+
// gated linear unit ops
|
|
1119
|
+
// A: n columns, r rows,
|
|
1120
|
+
// result is n / 2 columns, r rows,
|
|
1121
|
+
// expects gate in second half of row, unless swapped is true
|
|
1122
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
|
1123
|
+
struct ggml_context * ctx,
|
|
1124
|
+
struct ggml_tensor * a,
|
|
1125
|
+
enum ggml_glu_op op,
|
|
1126
|
+
bool swapped);
|
|
1127
|
+
|
|
1128
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
|
1129
|
+
struct ggml_context * ctx,
|
|
1130
|
+
struct ggml_tensor * a);
|
|
1131
|
+
|
|
1132
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
|
1133
|
+
struct ggml_context * ctx,
|
|
1134
|
+
struct ggml_tensor * a);
|
|
1135
|
+
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
|
1137
|
+
struct ggml_context * ctx,
|
|
1138
|
+
struct ggml_tensor * a);
|
|
1139
|
+
|
|
1140
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
|
1141
|
+
struct ggml_context * ctx,
|
|
1142
|
+
struct ggml_tensor * a);
|
|
1143
|
+
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
|
1145
|
+
struct ggml_context * ctx,
|
|
1146
|
+
struct ggml_tensor * a);
|
|
1147
|
+
|
|
1148
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * a);
|
|
1151
|
+
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1168
|
+
// A: n columns, r rows,
|
|
1169
|
+
// B: n columns, r rows,
|
|
1170
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
1171
|
+
struct ggml_context * ctx,
|
|
1172
|
+
struct ggml_tensor * a,
|
|
1173
|
+
struct ggml_tensor * b,
|
|
1174
|
+
enum ggml_glu_op op);
|
|
1175
|
+
|
|
1176
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
|
1177
|
+
struct ggml_context * ctx,
|
|
1178
|
+
struct ggml_tensor * a,
|
|
1179
|
+
struct ggml_tensor * b);
|
|
1180
|
+
|
|
1181
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
|
1182
|
+
struct ggml_context * ctx,
|
|
1183
|
+
struct ggml_tensor * a,
|
|
1184
|
+
struct ggml_tensor * b);
|
|
1185
|
+
|
|
1186
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
|
1187
|
+
struct ggml_context * ctx,
|
|
1188
|
+
struct ggml_tensor * a,
|
|
1189
|
+
struct ggml_tensor * b);
|
|
1190
|
+
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1093
1201
|
// normalize along rows
|
|
1094
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1095
1203
|
struct ggml_context * ctx,
|
|
@@ -1189,6 +1297,19 @@ extern "C" {
|
|
|
1189
1297
|
struct ggml_tensor * a,
|
|
1190
1298
|
float s);
|
|
1191
1299
|
|
|
1300
|
+
// x = s * a + b
|
|
1301
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
|
1302
|
+
struct ggml_context * ctx,
|
|
1303
|
+
struct ggml_tensor * a,
|
|
1304
|
+
float s,
|
|
1305
|
+
float b);
|
|
1306
|
+
|
|
1307
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
|
1308
|
+
struct ggml_context * ctx,
|
|
1309
|
+
struct ggml_tensor * a,
|
|
1310
|
+
float s,
|
|
1311
|
+
float b);
|
|
1312
|
+
|
|
1192
1313
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1193
1314
|
GGML_API struct ggml_tensor * ggml_set(
|
|
1194
1315
|
struct ggml_context * ctx,
|
|
@@ -1433,8 +1554,14 @@ extern "C" {
|
|
|
1433
1554
|
struct ggml_context * ctx,
|
|
1434
1555
|
struct ggml_tensor * a);
|
|
1435
1556
|
|
|
1557
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1558
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1559
|
+
//
|
|
1560
|
+
// broadcast:
|
|
1561
|
+
// ne02 % ne12 == 0
|
|
1562
|
+
// ne03 % ne13 == 0
|
|
1563
|
+
//
|
|
1436
1564
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1437
|
-
// mask is optional
|
|
1438
1565
|
// max_bias = 0.0f for no ALiBi
|
|
1439
1566
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1440
1567
|
struct ggml_context * ctx,
|
|
@@ -1744,6 +1871,17 @@ extern "C" {
|
|
|
1744
1871
|
struct ggml_tensor * b,
|
|
1745
1872
|
int stride);
|
|
1746
1873
|
|
|
1874
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1875
|
+
struct ggml_context * ctx,
|
|
1876
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1877
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1878
|
+
int s0, // stride dimension 0
|
|
1879
|
+
int s1, // stride dimension 1
|
|
1880
|
+
int p0, // padding dimension 0
|
|
1881
|
+
int p1, // padding dimension 1
|
|
1882
|
+
int d0, // dilation dimension 0
|
|
1883
|
+
int d1); // dilation dimension 1
|
|
1884
|
+
|
|
1747
1885
|
enum ggml_op_pool {
|
|
1748
1886
|
GGML_OP_POOL_MAX,
|
|
1749
1887
|
GGML_OP_POOL_AVG,
|
|
@@ -1786,6 +1924,12 @@ extern "C" {
|
|
|
1786
1924
|
enum ggml_scale_mode {
|
|
1787
1925
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1788
1926
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1927
|
+
|
|
1928
|
+
GGML_SCALE_MODE_COUNT
|
|
1929
|
+
};
|
|
1930
|
+
|
|
1931
|
+
enum ggml_scale_flag {
|
|
1932
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1789
1933
|
};
|
|
1790
1934
|
|
|
1791
1935
|
// interpolate
|
|
@@ -1798,14 +1942,26 @@ extern "C" {
|
|
|
1798
1942
|
|
|
1799
1943
|
// interpolate
|
|
1800
1944
|
// interpolate scale to specified dimensions
|
|
1801
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1945
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1802
1946
|
struct ggml_context * ctx,
|
|
1803
1947
|
struct ggml_tensor * a,
|
|
1804
1948
|
int ne0,
|
|
1805
1949
|
int ne1,
|
|
1806
1950
|
int ne2,
|
|
1807
1951
|
int ne3,
|
|
1808
|
-
enum ggml_scale_mode mode)
|
|
1952
|
+
enum ggml_scale_mode mode),
|
|
1953
|
+
"use ggml_interpolate instead");
|
|
1954
|
+
|
|
1955
|
+
// Up- or downsamples the input to the specified size.
|
|
1956
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1957
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1958
|
+
struct ggml_context * ctx,
|
|
1959
|
+
struct ggml_tensor * a,
|
|
1960
|
+
int64_t ne0,
|
|
1961
|
+
int64_t ne1,
|
|
1962
|
+
int64_t ne2,
|
|
1963
|
+
int64_t ne3,
|
|
1964
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1809
1965
|
|
|
1810
1966
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1811
1967
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1868,11 +2024,17 @@ extern "C" {
|
|
|
1868
2024
|
|
|
1869
2025
|
#define GGML_KQ_MASK_PAD 64
|
|
1870
2026
|
|
|
1871
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1872
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1873
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1874
|
-
// mask: [n_kv, n_batch_pad,
|
|
1875
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2027
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2028
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2029
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2030
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2031
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2032
|
+
//
|
|
2033
|
+
// broadcast:
|
|
2034
|
+
// n_head % n_head_kv == 0
|
|
2035
|
+
// n_head % ne32 == 0
|
|
2036
|
+
// ne3 % ne33 == 0
|
|
2037
|
+
//
|
|
1876
2038
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1877
2039
|
struct ggml_context * ctx,
|
|
1878
2040
|
struct ggml_tensor * q,
|
|
@@ -1911,7 +2073,8 @@ extern "C" {
|
|
|
1911
2073
|
struct ggml_tensor * dt,
|
|
1912
2074
|
struct ggml_tensor * A,
|
|
1913
2075
|
struct ggml_tensor * B,
|
|
1914
|
-
struct ggml_tensor * C
|
|
2076
|
+
struct ggml_tensor * C,
|
|
2077
|
+
struct ggml_tensor * ids);
|
|
1915
2078
|
|
|
1916
2079
|
// partition into non-overlapping windows with padding if needed
|
|
1917
2080
|
// example:
|
|
@@ -61,10 +61,6 @@
|
|
|
61
61
|
#include "ggml-cann.h"
|
|
62
62
|
#endif
|
|
63
63
|
|
|
64
|
-
#ifdef GGML_USE_KOMPUTE
|
|
65
|
-
#include "ggml-kompute.h"
|
|
66
|
-
#endif
|
|
67
|
-
|
|
68
64
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
69
65
|
#if defined(__clang__)
|
|
70
66
|
# pragma clang diagnostic push
|
|
@@ -189,9 +185,6 @@ struct ggml_backend_registry {
|
|
|
189
185
|
#ifdef GGML_USE_RPC
|
|
190
186
|
register_backend(ggml_backend_rpc_reg());
|
|
191
187
|
#endif
|
|
192
|
-
#ifdef GGML_USE_KOMPUTE
|
|
193
|
-
register_backend(ggml_backend_kompute_reg());
|
|
194
|
-
#endif
|
|
195
188
|
#ifdef GGML_USE_CPU
|
|
196
189
|
register_backend(ggml_backend_cpu_reg());
|
|
197
190
|
#endif
|
|
@@ -575,7 +568,6 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
575
568
|
ggml_backend_load_best("cann", silent, dir_path);
|
|
576
569
|
ggml_backend_load_best("cuda", silent, dir_path);
|
|
577
570
|
ggml_backend_load_best("hip", silent, dir_path);
|
|
578
|
-
ggml_backend_load_best("kompute", silent, dir_path);
|
|
579
571
|
ggml_backend_load_best("metal", silent, dir_path);
|
|
580
572
|
ggml_backend_load_best("rpc", silent, dir_path);
|
|
581
573
|
ggml_backend_load_best("sycl", silent, dir_path);
|
|
@@ -817,8 +817,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
|
817
817
|
}
|
|
818
818
|
if (sched->debug > 1) {
|
|
819
819
|
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
|
820
|
-
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
|
821
|
-
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)
|
|
820
|
+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
|
|
821
|
+
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
|
822
|
+
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
|
|
822
823
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
|
823
824
|
struct ggml_tensor * src = node->src[j];
|
|
824
825
|
if (src == NULL) {
|
|
@@ -1826,7 +1827,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
|
|
1826
1827
|
ggml_free(copy.ctx_unallocated);
|
|
1827
1828
|
}
|
|
1828
1829
|
|
|
1829
|
-
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
|
|
1830
|
+
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
|
1830
1831
|
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
|
1831
1832
|
if (copy.buffer == NULL) {
|
|
1832
1833
|
return false;
|
|
@@ -1837,28 +1838,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
|
1837
1838
|
|
|
1838
1839
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1839
1840
|
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1841
|
+
if (test_node != nullptr) {
|
|
1842
|
+
// Compute the whole graph and only test the output for a specific tensor
|
|
1843
|
+
ggml_backend_graph_compute(backend1, g1);
|
|
1844
|
+
ggml_backend_graph_compute(backend2, g2);
|
|
1843
1845
|
|
|
1844
|
-
|
|
1846
|
+
int test_node_idx = -1;
|
|
1847
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1848
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1849
|
+
if (t1 == test_node) {
|
|
1850
|
+
test_node_idx = i;
|
|
1851
|
+
break;
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
GGML_ASSERT(test_node_idx != -1);
|
|
1845
1855
|
|
|
1846
|
-
|
|
1847
|
-
|
|
1856
|
+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
1857
|
+
} else {
|
|
1858
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1859
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1860
|
+
struct ggml_tensor * t2 = g2->nodes[i];
|
|
1848
1861
|
|
|
1849
|
-
|
|
1850
|
-
ggml_backend_graph_compute(backend2, &g2v);
|
|
1862
|
+
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
|
|
1851
1863
|
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
}
|
|
1864
|
+
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
|
|
1865
|
+
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
|
|
1855
1866
|
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1867
|
+
ggml_backend_graph_compute(backend1, &g1v);
|
|
1868
|
+
ggml_backend_graph_compute(backend2, &g2v);
|
|
1869
|
+
|
|
1870
|
+
if (ggml_is_view_op(t1->op)) {
|
|
1871
|
+
continue;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
// compare results, calculate rms etc
|
|
1875
|
+
if (!callback(i, t1, t2, user_data)) {
|
|
1876
|
+
break;
|
|
1877
|
+
}
|
|
1859
1878
|
}
|
|
1860
1879
|
}
|
|
1861
|
-
|
|
1862
1880
|
ggml_backend_graph_copy_free(copy);
|
|
1863
1881
|
|
|
1864
1882
|
return true;
|
|
@@ -65,8 +65,9 @@
|
|
|
65
65
|
#include <aclnnop/aclnn_eq_tensor.h>
|
|
66
66
|
#include <aclnnop/aclnn_gt_scalar.h>
|
|
67
67
|
#include <aclnnop/aclnn_pow.h>
|
|
68
|
-
#include <aclnnop/
|
|
68
|
+
#include <aclnnop/aclnn_grouped_matmul_v3.h>
|
|
69
69
|
#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
|
|
70
|
+
#include <aclnnop/aclnn_zero.h>
|
|
70
71
|
#include <float.h>
|
|
71
72
|
|
|
72
73
|
#include <cmath>
|
|
@@ -804,10 +805,11 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
|
|
|
804
805
|
nb[i] = nb[i - 1] * ne[i - 1];
|
|
805
806
|
}
|
|
806
807
|
|
|
807
|
-
ggml_cann_async_memset(ctx, buffer, n_bytes, 0);
|
|
808
808
|
aclTensor* zero =
|
|
809
809
|
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
|
|
810
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
|
|
810
811
|
return zero;
|
|
812
|
+
GGML_UNUSED(n_bytes);
|
|
811
813
|
}
|
|
812
814
|
|
|
813
815
|
/**
|
|
@@ -2654,6 +2656,67 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2654
2656
|
memcpy(ori_src0_nb, cast_nb, sizeof(ori_src0_nb));
|
|
2655
2657
|
}
|
|
2656
2658
|
|
|
2659
|
+
#ifdef ASCEND_310P
|
|
2660
|
+
ggml_tensor src0_row = *src0;
|
|
2661
|
+
ggml_tensor src1_row = *src1;
|
|
2662
|
+
ggml_tensor dst_row = *dst;
|
|
2663
|
+
|
|
2664
|
+
if (src0->type == GGML_TYPE_F16) {
|
|
2665
|
+
src0_row.type = GGML_TYPE_F32;
|
|
2666
|
+
}
|
|
2667
|
+
|
|
2668
|
+
// src0_row [D, M, 1, 1] weight without permute
|
|
2669
|
+
src0_row.ne[2] = 1;
|
|
2670
|
+
src0_row.ne[3] = 1;
|
|
2671
|
+
src0_row.nb[0] = ori_src0_nb[0];
|
|
2672
|
+
src0_row.nb[1] = ori_src0_nb[1];
|
|
2673
|
+
src0_row.nb[2] = ori_src0_nb[1];
|
|
2674
|
+
src0_row.nb[3] = ori_src0_nb[1];
|
|
2675
|
+
|
|
2676
|
+
// src1_row [D, 1, 1, 1] -> input
|
|
2677
|
+
src1_row.ne[1] = 1;
|
|
2678
|
+
src1_row.ne[2] = 1;
|
|
2679
|
+
src1_row.ne[3] = 1;
|
|
2680
|
+
src1_row.nb[2] = nb11;
|
|
2681
|
+
src1_row.nb[3] = nb11;
|
|
2682
|
+
|
|
2683
|
+
// dst_row [M, 1, 1, 1] -> out
|
|
2684
|
+
dst_row.ne[1] = 1;
|
|
2685
|
+
dst_row.ne[2] = 1;
|
|
2686
|
+
dst_row.ne[3] = 1;
|
|
2687
|
+
dst_row.nb[2] = nb1;
|
|
2688
|
+
dst_row.nb[3] = nb1;
|
|
2689
|
+
|
|
2690
|
+
//create weight for one row
|
|
2691
|
+
for (int64_t iid1 = 0; iid1 < ids->ne[1]; iid1++) {
|
|
2692
|
+
for (int64_t id = 0; id < n_ids; id++) {
|
|
2693
|
+
// expert index
|
|
2694
|
+
int32_t i02 = *(int32_t *) (ids_host.data() + iid1*ids->nb[1] + id*ids->nb[0]);
|
|
2695
|
+
GGML_ASSERT(i02 >= 0 && i02 < n_as);
|
|
2696
|
+
|
|
2697
|
+
// If B = 1 (broadcast), always use 0; otherwise, use id.
|
|
2698
|
+
int64_t i11 = (ne11 == 1 ? 0 : id);
|
|
2699
|
+
int64_t i12 = iid1;
|
|
2700
|
+
|
|
2701
|
+
int64_t i1 = id;
|
|
2702
|
+
int64_t i2 = i12;
|
|
2703
|
+
|
|
2704
|
+
void* src0_tmp_ptr = src0_original + i02*ori_src0_nb[2];
|
|
2705
|
+
void* src1_tmp_ptr = src1_original + i11*nb11 + i12*nb12;
|
|
2706
|
+
void* dst_tmp_ptr = dst_original + i1*nb1 + i2*nb2;
|
|
2707
|
+
|
|
2708
|
+
src0_row.data = src0_tmp_ptr;
|
|
2709
|
+
src1_row.data = src1_tmp_ptr;
|
|
2710
|
+
dst_row.data = dst_tmp_ptr;
|
|
2711
|
+
dst_row.src[0] = &src0_row;
|
|
2712
|
+
dst_row.src[1] = &src1_row;
|
|
2713
|
+
|
|
2714
|
+
ggml_cann_mul_mat(ctx, &dst_row);
|
|
2715
|
+
}
|
|
2716
|
+
}
|
|
2717
|
+
return;
|
|
2718
|
+
#endif
|
|
2719
|
+
|
|
2657
2720
|
std::vector<aclTensor*> src0_tensor_vec;
|
|
2658
2721
|
std::vector<aclTensor*> src1_tensor_vec;
|
|
2659
2722
|
std::vector<aclTensor*> dst_tensor_vec;
|
|
@@ -2701,9 +2764,9 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2701
2764
|
}
|
|
2702
2765
|
|
|
2703
2766
|
size_t GROUP_SIZE = 128;
|
|
2704
|
-
//
|
|
2767
|
+
// GroupedMatmulV3 required tensor_list.size < 128
|
|
2705
2768
|
for (size_t i = 0; i < src0_tensor_vec.size(); i += GROUP_SIZE) {
|
|
2706
|
-
// split and call
|
|
2769
|
+
// split and call GroupedMatmulV3
|
|
2707
2770
|
size_t end = std::min(i + GROUP_SIZE, src0_tensor_vec.size());
|
|
2708
2771
|
std::vector<aclTensor*> src0_tensor_vec_split(src0_tensor_vec.begin() + i, src0_tensor_vec.begin() + end);
|
|
2709
2772
|
std::vector<aclTensor*> src1_tensor_vec_split(src1_tensor_vec.begin() + i, src1_tensor_vec.begin() + end);
|
|
@@ -2713,7 +2776,7 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2713
2776
|
aclTensorList* src1_tensor_list = aclCreateTensorList(src1_tensor_vec_split.data(), src1_tensor_vec_split.size());
|
|
2714
2777
|
aclTensorList* dst_tensor_list = aclCreateTensorList(dst_tensor_vec_split.data(), dst_tensor_vec_split.size());
|
|
2715
2778
|
|
|
2716
|
-
GGML_CANN_CALL_ACLNN_OP(ctx,
|
|
2779
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV3, src1_tensor_list, src0_tensor_list,
|
|
2717
2780
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, -1, dst_tensor_list);
|
|
2718
2781
|
|
|
2719
2782
|
ggml_cann_release_resources(ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list);
|
|
@@ -2086,6 +2086,12 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2086
2086
|
return false;
|
|
2087
2087
|
}
|
|
2088
2088
|
} break;
|
|
2089
|
+
case GGML_OP_SET_ROWS:
|
|
2090
|
+
{
|
|
2091
|
+
// TODO: add support
|
|
2092
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
|
|
2093
|
+
return false;
|
|
2094
|
+
} break;
|
|
2089
2095
|
case GGML_OP_CPY: {
|
|
2090
2096
|
ggml_tensor *src = op->src[0];
|
|
2091
2097
|
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
|
|
@@ -2182,12 +2188,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2182
2188
|
case GGML_OP_MUL:
|
|
2183
2189
|
case GGML_OP_DIV:
|
|
2184
2190
|
case GGML_OP_RMS_NORM:
|
|
2185
|
-
case GGML_OP_SCALE:
|
|
2186
2191
|
case GGML_OP_SQR:
|
|
2187
2192
|
case GGML_OP_SQRT:
|
|
2188
2193
|
case GGML_OP_CLAMP:
|
|
2189
2194
|
case GGML_OP_DIAG_MASK_INF:
|
|
2190
|
-
case GGML_OP_SOFT_MAX:
|
|
2191
2195
|
case GGML_OP_SUM_ROWS:
|
|
2192
2196
|
case GGML_OP_ARGSORT:
|
|
2193
2197
|
case GGML_OP_ACC:
|
|
@@ -2205,6 +2209,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2205
2209
|
case GGML_OP_PAD_REFLECT_1D:
|
|
2206
2210
|
case GGML_OP_COUNT_EQUAL:
|
|
2207
2211
|
return true;
|
|
2212
|
+
case GGML_OP_SCALE:
|
|
2213
|
+
float bias;
|
|
2214
|
+
memcpy(&bias, (float*)op->op_params + 1, sizeof(float));
|
|
2215
|
+
return bias == 0.0f; // TODO: support bias != 0.0f
|
|
2216
|
+
case GGML_OP_SOFT_MAX:
|
|
2217
|
+
// TODO: support broadcast
|
|
2218
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14435
|
|
2219
|
+
return !op->src[1] || (op->src[1]->ne[2] == 1 && op->src[1]->ne[3] == 1);
|
|
2208
2220
|
case GGML_OP_FLASH_ATTN_EXT:{
|
|
2209
2221
|
// derived from [ggml-cuda.cu]
|
|
2210
2222
|
if(op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16){
|
|
@@ -2227,6 +2239,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2227
2239
|
// DeepSeek MLA
|
|
2228
2240
|
return false;
|
|
2229
2241
|
}
|
|
2242
|
+
// TODO: support broadcast
|
|
2243
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14435
|
|
2230
2244
|
if (op->src[0]->ne[3] != 1) {
|
|
2231
2245
|
return false;
|
|
2232
2246
|
}
|
|
@@ -5,7 +5,7 @@ function(ggml_add_cpu_backend_features cpu_name arch)
|
|
|
5
5
|
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
6
6
|
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
|
|
7
7
|
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
|
|
8
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE .
|
|
8
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
|
|
9
9
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
|
|
10
10
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
11
11
|
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
@@ -589,4 +589,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
589
589
|
if (EMSCRIPTEN)
|
|
590
590
|
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
591
591
|
endif()
|
|
592
|
+
|
|
593
|
+
if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
|
|
594
|
+
# The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
|
|
595
|
+
target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
|
|
596
|
+
endif()
|
|
592
597
|
endfunction()
|