@novastera-oss/llamarn 0.4.0 → 0.4.3-beta4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +4 -1
- package/android/CMakeLists.txt +13 -3
- package/android/src/main/cpp/include/llama.h +44 -21
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +2 -10
- package/cpp/SystemUtils.cpp +3 -7
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +12 -0
- package/cpp/llama.cpp/CODEOWNERS +116 -10
- package/cpp/llama.cpp/CONTRIBUTING.md +30 -3
- package/cpp/llama.cpp/README.md +13 -5
- package/cpp/llama.cpp/build-xcframework.sh +5 -0
- package/cpp/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +12 -2
- package/cpp/llama.cpp/common/arg.cpp +303 -795
- package/cpp/llama.cpp/common/arg.h +2 -3
- package/cpp/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/cpp/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/llama.cpp/common/chat-parser.cpp +156 -15
- package/cpp/llama.cpp/common/chat-parser.h +13 -0
- package/cpp/llama.cpp/common/chat.cpp +1147 -88
- package/cpp/llama.cpp/common/chat.h +16 -3
- package/cpp/llama.cpp/common/common.cpp +70 -15
- package/cpp/llama.cpp/common/common.h +57 -19
- package/cpp/llama.cpp/common/download.cpp +1072 -0
- package/cpp/llama.cpp/common/download.h +55 -0
- package/cpp/llama.cpp/common/http.h +73 -0
- package/cpp/llama.cpp/common/json-partial.cpp +70 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +61 -22
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/cpp/llama.cpp/common/log.cpp +59 -2
- package/cpp/llama.cpp/common/log.h +12 -4
- package/cpp/llama.cpp/common/sampling.cpp +84 -8
- package/cpp/llama.cpp/common/sampling.h +3 -1
- package/cpp/llama.cpp/common/speculative.cpp +1 -1
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1608 -233
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +6 -1
- package/cpp/llama.cpp/convert_lora_to_gguf.py +37 -5
- package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -28
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +19 -1
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml-metal.h +1 -6
- package/cpp/llama.cpp/ggml/include/ggml-rpc.h +7 -9
- package/cpp/llama.cpp/ggml/include/ggml-zdnn.h +2 -1
- package/cpp/llama.cpp/ggml/include/ggml.h +199 -6
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +38 -0
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +299 -130
- package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +21 -5
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +99 -2
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +138 -47
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +1584 -1773
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +201 -317
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +146 -187
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +771 -713
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +135 -77
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +16 -17
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +318 -145
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +155 -60
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +8 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +108 -64
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +14 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +530 -87
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +37 -45
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +349 -127
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +947 -1218
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +143 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +82 -76
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +233 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +326 -66
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +102 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +110 -76
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +167 -38
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d.cu +6 -11
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +245 -151
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +1 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +341 -289
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh +1233 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +123 -220
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +41 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +715 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +150 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +321 -24
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cu +93 -351
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cuh +828 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cu +164 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +3 -166
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cu +371 -78
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +279 -147
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +97 -85
- package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +46 -23
- package/cpp/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu +63 -54
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +12 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +192 -77
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +137 -75
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cu +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu +336 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +105 -11
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +36 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +87 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +28 -12
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +68 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3807 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +442 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.c +69 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +119 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +156 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +64 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +93 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.c +960 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +1032 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +829 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2223 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +418 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +255 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +448 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.h +220 -0
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +110 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +6 -5
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m +599 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1662 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +251 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m +1527 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +244 -39
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +3844 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +90 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +723 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +3453 -1907
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1331 -109
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +126 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +35 -7
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +123 -10
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +341 -161
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +6 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +74 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +50 -30
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +10 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +166 -99
- package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +72 -94
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +21 -31
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +252 -316
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +6 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +9 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +359 -142
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +80 -60
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +201 -132
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +230 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +50 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +45 -36
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +330 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +16 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4184 -2159
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +53 -30
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +13 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +138 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +52 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +50 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +61 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +54 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +229 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +33 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +3 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +106 -634
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +118 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +556 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +70 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +77 -214
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +589 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +25 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +55 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +45 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +227 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +5 -52
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +5 -35
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +5 -35
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +5 -41
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +140 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +171 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +79 -29
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +471 -196
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1690 -383
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +57 -10
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +25 -912
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/{set_rows.wgsl → set_rows.tmpl.wgsl} +38 -8
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +96 -314
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +440 -17
- package/cpp/llama.cpp/ggml/src/gguf.cpp +104 -29
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +363 -13
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +64 -0
- package/cpp/llama.cpp/gguf-py/gguf/lazy.py +8 -3
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +6 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +156 -18
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +80 -0
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +4 -4
- package/cpp/llama.cpp/include/llama.h +44 -21
- package/cpp/llama.cpp/media/llama1-icon-transparent.png +0 -0
- package/cpp/llama.cpp/media/llama1-icon-transparent.svg +77 -0
- package/cpp/llama.cpp/media/llama1-icon.png +0 -0
- package/cpp/llama.cpp/media/llama1-icon.svg +87 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -3
- package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +3 -1
- package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +101 -0
- package/cpp/llama.cpp/src/llama-adapter.cpp +33 -0
- package/cpp/llama.cpp/src/llama-adapter.h +3 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +344 -14
- package/cpp/llama.cpp/src/llama-arch.h +50 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +63 -31
- package/cpp/llama.cpp/src/llama-batch.h +13 -2
- package/cpp/llama.cpp/src/llama-chat.cpp +85 -3
- package/cpp/llama.cpp/src/llama-chat.h +4 -0
- package/cpp/llama.cpp/src/llama-context.cpp +300 -45
- package/cpp/llama.cpp/src/llama-context.h +16 -6
- package/cpp/llama.cpp/src/llama-cparams.h +2 -1
- package/cpp/llama.cpp/src/llama-grammar.cpp +17 -9
- package/cpp/llama.cpp/src/llama-graph.cpp +226 -64
- package/cpp/llama.cpp/src/llama-graph.h +27 -5
- package/cpp/llama.cpp/src/llama-hparams.cpp +53 -2
- package/cpp/llama.cpp/src/llama-hparams.h +48 -8
- package/cpp/llama.cpp/src/llama-impl.cpp +3 -3
- package/cpp/llama.cpp/src/llama-impl.h +2 -0
- package/cpp/llama.cpp/src/llama-kv-cache-iswa.cpp +13 -3
- package/cpp/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +120 -62
- package/cpp/llama.cpp/src/llama-kv-cache.h +13 -4
- package/cpp/llama.cpp/src/llama-kv-cells.h +44 -2
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +2 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +38 -17
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +5 -2
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1070 -12614
- package/cpp/llama.cpp/src/llama-model.h +40 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +14 -6
- package/cpp/llama.cpp/src/llama-sampling.cpp +243 -136
- package/cpp/llama.cpp/src/llama-vocab.cpp +43 -3
- package/cpp/llama.cpp/src/llama-vocab.h +43 -39
- package/cpp/llama.cpp/src/llama.cpp +69 -10
- package/cpp/llama.cpp/src/models/afmoe.cpp +187 -0
- package/cpp/llama.cpp/src/models/apertus.cpp +125 -0
- package/cpp/llama.cpp/src/models/arcee.cpp +135 -0
- package/cpp/llama.cpp/src/models/arctic.cpp +138 -0
- package/cpp/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/llama.cpp/src/models/baichuan.cpp +122 -0
- package/cpp/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/llama.cpp/src/models/bert.cpp +176 -0
- package/cpp/llama.cpp/src/models/bitnet.cpp +160 -0
- package/cpp/llama.cpp/src/models/bloom.cpp +101 -0
- package/cpp/llama.cpp/src/models/chameleon.cpp +178 -0
- package/cpp/llama.cpp/src/models/chatglm.cpp +132 -0
- package/cpp/llama.cpp/src/models/codeshell.cpp +111 -0
- package/cpp/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/cpp/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/cpp/llama.cpp/src/models/command-r.cpp +122 -0
- package/cpp/llama.cpp/src/models/dbrx.cpp +123 -0
- package/cpp/llama.cpp/src/models/deci.cpp +135 -0
- package/cpp/llama.cpp/src/models/deepseek.cpp +144 -0
- package/cpp/llama.cpp/src/models/deepseek2.cpp +237 -0
- package/cpp/llama.cpp/src/models/dots1.cpp +134 -0
- package/cpp/llama.cpp/src/models/dream.cpp +105 -0
- package/cpp/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/llama.cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/llama.cpp/src/models/exaone.cpp +114 -0
- package/cpp/llama.cpp/src/models/exaone4.cpp +123 -0
- package/cpp/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/cpp/llama.cpp/src/models/falcon.cpp +120 -0
- package/cpp/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/cpp/llama.cpp/src/models/gemma.cpp +112 -0
- package/cpp/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/cpp/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/cpp/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/cpp/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/cpp/llama.cpp/src/models/glm4.cpp +127 -0
- package/cpp/llama.cpp/src/models/gpt2.cpp +105 -0
- package/cpp/llama.cpp/src/models/gptneox.cpp +144 -0
- package/cpp/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/llama.cpp/src/models/granite.cpp +211 -0
- package/cpp/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/cpp/llama.cpp/src/models/grok.cpp +159 -0
- package/cpp/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/llama.cpp/src/models/internlm2.cpp +120 -0
- package/cpp/llama.cpp/src/models/jais.cpp +86 -0
- package/cpp/llama.cpp/src/models/jamba.cpp +106 -0
- package/cpp/llama.cpp/src/models/lfm2.cpp +173 -0
- package/cpp/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/llama.cpp/src/models/llada.cpp +99 -0
- package/cpp/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/cpp/llama.cpp/src/models/llama.cpp +155 -0
- package/cpp/llama.cpp/src/models/mamba.cpp +55 -0
- package/cpp/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/cpp/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/llama.cpp/src/models/models.h +485 -0
- package/cpp/llama.cpp/src/models/mpt.cpp +126 -0
- package/cpp/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/cpp/llama.cpp/src/models/nemotron.cpp +122 -0
- package/cpp/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/llama.cpp/src/models/olmo.cpp +121 -0
- package/cpp/llama.cpp/src/models/olmo2.cpp +150 -0
- package/cpp/llama.cpp/src/models/olmoe.cpp +124 -0
- package/cpp/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
- package/cpp/llama.cpp/src/models/openelm.cpp +124 -0
- package/cpp/llama.cpp/src/models/orion.cpp +123 -0
- package/cpp/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/llama.cpp/src/models/phi2.cpp +121 -0
- package/cpp/llama.cpp/src/models/phi3.cpp +152 -0
- package/cpp/llama.cpp/src/models/plamo.cpp +110 -0
- package/cpp/llama.cpp/src/models/plamo2.cpp +316 -0
- package/cpp/llama.cpp/src/models/plm.cpp +168 -0
- package/cpp/llama.cpp/src/models/qwen.cpp +108 -0
- package/cpp/llama.cpp/src/models/qwen2.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen3.cpp +117 -0
- package/cpp/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/cpp/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/cpp/llama.cpp/src/models/refact.cpp +94 -0
- package/cpp/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/cpp/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/cpp/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/cpp/llama.cpp/src/models/smollm3.cpp +128 -0
- package/cpp/llama.cpp/src/models/stablelm.cpp +146 -0
- package/cpp/llama.cpp/src/models/starcoder.cpp +100 -0
- package/cpp/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/llama.cpp/src/models/xverse.cpp +108 -0
- package/cpp/llama.cpp/src/unicode.cpp +77 -0
- package/cpp/llama.cpp/src/unicode.h +43 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +94 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.cpp +9339 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +433 -8222
- package/cpp/llama.cpp/vendor/cpp-httplib/patch-boringssl.cmake +6 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +4179 -1900
- package/cpp/llama.cpp/vendor/minja/chat-template.hpp +9 -2
- package/cpp/llama.cpp/vendor/minja/minja.hpp +101 -22
- package/cpp/rn-completion.cpp +3 -27
- package/ios/include/chat.h +16 -3
- package/ios/include/common/minja/chat-template.hpp +9 -2
- package/ios/include/common/minja/minja.hpp +101 -22
- package/ios/include/common.h +57 -19
- package/ios/include/json-schema-to-grammar.h +2 -0
- package/ios/include/llama.h +44 -21
- package/ios/include/log.h +12 -4
- package/ios/include/sampling.h +3 -1
- package/ios/libs/llama.xcframework/Info.plist +20 -20
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6399 -5557
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +19 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +1 -6
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +199 -6
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +44 -21
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6362 -5520
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4813 -4241
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +19 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +1 -6
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +199 -6
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +44 -21
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +10 -4
- package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +0 -2579
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -371
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -379
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -495
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -486
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +0 -6886
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -154
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn-impl.h +0 -97
- package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
- package/cpp/llama.cpp/models/templates/ByteDance-Seed-OSS.jinja +0 -171
- package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +0 -202
- package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +0 -156
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +0 -124
- package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +0 -152
- package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +0 -152
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +0 -62
- package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +0 -54
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +0 -85
- package/cpp/llama.cpp/models/templates/README.md +0 -25
- package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +0 -1
- package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +0 -1
- package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +0 -57
- package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +0 -4
- package/cpp/llama.cpp/models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +0 -59
- package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +0 -76
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +0 -34
- package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +0 -58
- package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +0 -287
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +0 -109
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +0 -93
- package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +0 -109
- package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +0 -8
- package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +0 -87
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +0 -43
- package/cpp/llama.cpp/models/templates/openai-gpt-oss-120b.jinja +0 -331
- package/cpp/llama.cpp/models/templates/unsloth-mistral-Devstral-Small-2507.jinja +0 -105
- package/cpp/llama.cpp/prompts/LLM-questions.txt +0 -49
- package/cpp/llama.cpp/prompts/alpaca.txt +0 -1
- package/cpp/llama.cpp/prompts/assistant.txt +0 -31
- package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
- package/cpp/llama.cpp/prompts/chat-with-bob.txt +0 -7
- package/cpp/llama.cpp/prompts/chat-with-qwen.txt +0 -1
- package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
- package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
- package/cpp/llama.cpp/prompts/chat.txt +0 -28
- package/cpp/llama.cpp/prompts/dan-modified.txt +0 -1
- package/cpp/llama.cpp/prompts/dan.txt +0 -1
- package/cpp/llama.cpp/prompts/mnemonics.txt +0 -93
- package/cpp/llama.cpp/prompts/parallel-questions.txt +0 -43
- package/cpp/llama.cpp/prompts/reason-act.txt +0 -18
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4247
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +0 -32
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5561
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +0 -35
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4246
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +0 -35
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5558
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +0 -32
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5520
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4243
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +0 -32
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -23,31 +23,35 @@
|
|
|
23
23
|
#ifndef CANN_ACLNN_OPS
|
|
24
24
|
#define CANN_ACLNN_OPS
|
|
25
25
|
|
|
26
|
-
#include
|
|
27
|
-
#include
|
|
26
|
+
#include "acl_tensor.h"
|
|
27
|
+
#include "common.h"
|
|
28
|
+
|
|
28
29
|
#include <aclnnop/aclnn_abs.h>
|
|
29
|
-
#include <aclnnop/aclnn_neg.h>
|
|
30
|
-
#include <aclnnop/aclnn_exp.h>
|
|
31
30
|
#include <aclnnop/aclnn_arange.h>
|
|
32
31
|
#include <aclnnop/aclnn_argsort.h>
|
|
33
32
|
#include <aclnnop/aclnn_cat.h>
|
|
34
33
|
#include <aclnnop/aclnn_clamp.h>
|
|
34
|
+
#include <aclnnop/aclnn_cos.h>
|
|
35
|
+
#include <aclnnop/aclnn_exp.h>
|
|
35
36
|
#include <aclnnop/aclnn_gelu.h>
|
|
36
37
|
#include <aclnnop/aclnn_gelu_v2.h>
|
|
37
|
-
#include <aclnnop/aclnn_sigmoid.h>
|
|
38
38
|
#include <aclnnop/aclnn_hardsigmoid.h>
|
|
39
39
|
#include <aclnnop/aclnn_hardswish.h>
|
|
40
40
|
#include <aclnnop/aclnn_leaky_relu.h>
|
|
41
|
+
#include <aclnnop/aclnn_log.h>
|
|
42
|
+
#include <aclnnop/aclnn_logsoftmax.h>
|
|
43
|
+
#include <aclnnop/aclnn_neg.h>
|
|
44
|
+
#include <aclnnop/aclnn_norm.h>
|
|
41
45
|
#include <aclnnop/aclnn_relu.h>
|
|
46
|
+
#include <aclnnop/aclnn_sigmoid.h>
|
|
47
|
+
#include <aclnnop/aclnn_sign.h>
|
|
42
48
|
#include <aclnnop/aclnn_silu.h>
|
|
43
|
-
#include <aclnnop/aclnn_tanh.h>
|
|
44
|
-
#include <aclnnop/aclnn_sqrt.h>
|
|
45
49
|
#include <aclnnop/aclnn_sin.h>
|
|
46
|
-
#include <aclnnop/
|
|
47
|
-
#include <aclnnop/
|
|
48
|
-
|
|
49
|
-
#include
|
|
50
|
-
#include
|
|
50
|
+
#include <aclnnop/aclnn_sqrt.h>
|
|
51
|
+
#include <aclnnop/aclnn_tanh.h>
|
|
52
|
+
|
|
53
|
+
#include <functional>
|
|
54
|
+
#include <unordered_set>
|
|
51
55
|
|
|
52
56
|
/**
|
|
53
57
|
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
|
@@ -62,7 +66,7 @@
|
|
|
62
66
|
* @param dst The ggml tensor representing the destination, which op is
|
|
63
67
|
* GGML_OP_REPEAT and specifies the desired dimensions.
|
|
64
68
|
*/
|
|
65
|
-
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
69
|
+
void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
66
70
|
|
|
67
71
|
/**
|
|
68
72
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|
@@ -82,7 +86,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
82
86
|
* @param dst The destination tensor where the result of the Leaky ReLU
|
|
83
87
|
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
|
84
88
|
*/
|
|
85
|
-
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
89
|
+
void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
86
90
|
|
|
87
91
|
/**
|
|
88
92
|
* @brief Concatenates multiple tensors along a specified dimension using the
|
|
@@ -97,7 +101,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
97
101
|
* @attention tensorList length should be 2 and the dimension using for concat
|
|
98
102
|
* default to 1.
|
|
99
103
|
*/
|
|
100
|
-
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
104
|
+
void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
101
105
|
|
|
102
106
|
/**
|
|
103
107
|
* @brief Generates a sequence of evenly spaced values within a specified
|
|
@@ -113,7 +117,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
113
117
|
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
|
114
118
|
* `GGML_OP_ARANGE`.
|
|
115
119
|
*/
|
|
116
|
-
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
120
|
+
void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
117
121
|
|
|
118
122
|
/**
|
|
119
123
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
|
@@ -131,7 +135,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
131
135
|
* @param dst The destination tensor where the clamped values will be stored.
|
|
132
136
|
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
|
133
137
|
*/
|
|
134
|
-
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
138
|
+
void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
135
139
|
|
|
136
140
|
/**
|
|
137
141
|
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
|
@@ -148,7 +152,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
148
152
|
* @param dst The destination tensor where the scaled values will be stored.
|
|
149
153
|
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
|
150
154
|
*/
|
|
151
|
-
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
155
|
+
void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
152
156
|
|
|
153
157
|
/**
|
|
154
158
|
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
|
@@ -163,7 +167,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
163
167
|
* @param dst The destination tensor where the sorted indices will be stored.
|
|
164
168
|
* dst->op is `GGML_OP_ARGSORT`.
|
|
165
169
|
*/
|
|
166
|
-
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
170
|
+
void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
167
171
|
|
|
168
172
|
/**
|
|
169
173
|
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
|
@@ -185,7 +189,67 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
185
189
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
186
190
|
* @attention `Var` defaults to dst->ne[0].
|
|
187
191
|
*/
|
|
188
|
-
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
192
|
+
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
|
|
196
|
+
* backend.
|
|
197
|
+
*
|
|
198
|
+
* @details This function applies the L2 Normalization operation on the
|
|
199
|
+
* input tensor `src` and stores the result in the destination tensor
|
|
200
|
+
* `dst`. L2 Normalization scales the input tensor such that the
|
|
201
|
+
* L2 norm along the specified dimension equals 1. This operation
|
|
202
|
+
* is commonly used in neural networks for feature normalization
|
|
203
|
+
* and vector scaling.
|
|
204
|
+
* The operation is defined as:
|
|
205
|
+
* \f[
|
|
206
|
+
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
|
|
207
|
+
* \f]
|
|
208
|
+
* The normalization is performed along the last dimension by default.
|
|
209
|
+
*
|
|
210
|
+
* @param ctx The CANN context used for operations.
|
|
211
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
|
212
|
+
* @attention The normalization is performed along the last dimension of the
|
|
213
|
+
* input tensor by default.
|
|
214
|
+
*/
|
|
215
|
+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
|
|
219
|
+
* backend.
|
|
220
|
+
*
|
|
221
|
+
* @details This function computes the cross entropy loss between the predicted
|
|
222
|
+
* logits and target probability distributions. The operation follows
|
|
223
|
+
* the same computation pattern as the CPU implementation:
|
|
224
|
+
* 1. Applies log_softmax to the logits along the class dimension
|
|
225
|
+
* 2. Element-wise multiplication with target distributions
|
|
226
|
+
* 3. Summation along the class dimension to get per-sample losses
|
|
227
|
+
* 4. Global summation and scaling by -1/nr to get final loss
|
|
228
|
+
*
|
|
229
|
+
* The computation can be expressed as:
|
|
230
|
+
* \f[
|
|
231
|
+
* \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
|
|
232
|
+
* \f]
|
|
233
|
+
* where \f$N\f$ is the total number of samples, \f$C\f$ is the number
|
|
234
|
+
* of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
|
|
235
|
+
* probability distributions.
|
|
236
|
+
*
|
|
237
|
+
* @param ctx The CANN context used for operations.
|
|
238
|
+
* @param dst The destination tensor where the computed loss will be stored.
|
|
239
|
+
* This should be a scalar tensor containing the final loss value.
|
|
240
|
+
*
|
|
241
|
+
* @note This implementation computes cross entropy between probability
|
|
242
|
+
* distributions, not the typical classification cross entropy that
|
|
243
|
+
* expects class indices as targets. Both input tensors (src0 and src1)
|
|
244
|
+
* should have the same shape and represent probability distributions
|
|
245
|
+
* over the class dimension.
|
|
246
|
+
* @note The function expects two source tensors:
|
|
247
|
+
* - dst->src[0]: Logits tensor (before softmax)
|
|
248
|
+
* - dst->src[1]: Target probability distributions tensor
|
|
249
|
+
* @note The computation is performed using CANN backend operators including
|
|
250
|
+
* LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
|
|
251
|
+
*/
|
|
252
|
+
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
189
253
|
|
|
190
254
|
/**
|
|
191
255
|
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
|
@@ -209,7 +273,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
209
273
|
*
|
|
210
274
|
* @attention eps defaults to 1e-6f.
|
|
211
275
|
*/
|
|
212
|
-
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
276
|
+
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
213
277
|
|
|
214
278
|
/**
|
|
215
279
|
* @brief Computes the accumulation of tensors using the CANN backend.
|
|
@@ -228,7 +292,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
228
292
|
* @param dst The destination tensor where the accumulated values will be stored.
|
|
229
293
|
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
|
230
294
|
*/
|
|
231
|
-
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
295
|
+
void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
232
296
|
|
|
233
297
|
/**
|
|
234
298
|
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
|
@@ -244,7 +308,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
244
308
|
*
|
|
245
309
|
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
|
246
310
|
*/
|
|
247
|
-
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
311
|
+
void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
248
312
|
|
|
249
313
|
/**
|
|
250
314
|
* @brief Computes the sum of elements in a ggml tensor.
|
|
@@ -258,7 +322,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
258
322
|
*
|
|
259
323
|
*/
|
|
260
324
|
|
|
261
|
-
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
325
|
+
void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
262
326
|
|
|
263
327
|
/**
|
|
264
328
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|
@@ -274,8 +338,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
274
338
|
* @param dst The destination tensor where the upsampled values will be stored.
|
|
275
339
|
* dst->op is `GGML_OP_UPSCALE`.
|
|
276
340
|
*/
|
|
277
|
-
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
278
|
-
ggml_tensor* dst);
|
|
341
|
+
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
279
342
|
|
|
280
343
|
/**
|
|
281
344
|
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
|
@@ -290,7 +353,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
290
353
|
* @param dst The destination tensor, which specifies the target dimensions for
|
|
291
354
|
* padding. dst->op is `GGML_OP_PAD`.
|
|
292
355
|
*/
|
|
293
|
-
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
356
|
+
void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
294
357
|
|
|
295
358
|
/**
|
|
296
359
|
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
|
@@ -307,7 +370,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
307
370
|
* @param dst The destination tensor on which the pooling operation is to be
|
|
308
371
|
* performed. dst->op is `GGML_OP_POOL_2D`.
|
|
309
372
|
*/
|
|
310
|
-
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
373
|
+
void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
311
374
|
|
|
312
375
|
/**
|
|
313
376
|
* @brief Duplicates a ggml tensor using the CANN backend.
|
|
@@ -326,7 +389,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
326
389
|
* different shape and dst is no-contiguous.
|
|
327
390
|
* @note: This func need to simplify.
|
|
328
391
|
*/
|
|
329
|
-
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
392
|
+
void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
330
393
|
|
|
331
394
|
/**
|
|
332
395
|
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
|
@@ -348,7 +411,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
348
411
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
349
412
|
* dst->op is `GGML_OP_RMS_NORM`.
|
|
350
413
|
*/
|
|
351
|
-
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
414
|
+
void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
352
415
|
|
|
353
416
|
/**
|
|
354
417
|
* @brief Applies a diagonal mask to the tensor with a specified value.
|
|
@@ -363,7 +426,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
363
426
|
* `GGML_OP_DIAG_MASK`
|
|
364
427
|
* @param value The value to use for masking.
|
|
365
428
|
*/
|
|
366
|
-
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
|
429
|
+
void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
|
|
367
430
|
|
|
368
431
|
/**
|
|
369
432
|
* @brief Performs an image-to-column transformation on the input tensor.
|
|
@@ -378,7 +441,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
|
|
|
378
441
|
* @param dst The destination tensor that stores the result of the operation.
|
|
379
442
|
* dst->op is `GGML_OP_IM2COL`.
|
|
380
443
|
*/
|
|
381
|
-
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
444
|
+
void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
382
445
|
|
|
383
446
|
/**
|
|
384
447
|
* @brief Computes time step embeddings using sine and cosine functions.
|
|
@@ -392,10 +455,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
392
455
|
* @param dst The destination tensor where the result of the embedding operation
|
|
393
456
|
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
|
394
457
|
*/
|
|
395
|
-
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
458
|
+
void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
396
459
|
|
|
397
460
|
// @see ggml_cann_dup.
|
|
398
|
-
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
461
|
+
void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
399
462
|
|
|
400
463
|
/**
|
|
401
464
|
* @brief Computes the softmax activation with optional masking.
|
|
@@ -417,7 +480,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
417
480
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|
418
481
|
* `GGML_OP_SOFTMAX`.
|
|
419
482
|
*/
|
|
420
|
-
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
483
|
+
void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
421
484
|
|
|
422
485
|
/**
|
|
423
486
|
* @brief Extracts specific rows from a tensor based on indices.
|
|
@@ -429,7 +492,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
429
492
|
* @param ctx The backend CANN context for executing operations.
|
|
430
493
|
* @param dst The destination tensor where the extracted rows will be stored.
|
|
431
494
|
*/
|
|
432
|
-
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
495
|
+
void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
433
496
|
|
|
434
497
|
/**
|
|
435
498
|
* @brief Writes specific rows into a tensor at positions specified by indices.
|
|
@@ -441,7 +504,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
441
504
|
* @param ctx The backend CANN context for executing operations.
|
|
442
505
|
* @param dst The destination tensor where the specified rows will be updated.
|
|
443
506
|
*/
|
|
444
|
-
void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
507
|
+
void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
445
508
|
|
|
446
509
|
/**
|
|
447
510
|
* @brief Executes matrix multiplication for the given tensor.
|
|
@@ -454,7 +517,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
454
517
|
* @param dst The destination tensor for storing the result of the matrix
|
|
455
518
|
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
|
456
519
|
*/
|
|
457
|
-
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
520
|
+
void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
458
521
|
|
|
459
522
|
/**
|
|
460
523
|
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
|
@@ -477,7 +540,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
477
540
|
* @note The function currently does not support cases where the freq_scale is
|
|
478
541
|
* not equal 1.
|
|
479
542
|
*/
|
|
480
|
-
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
543
|
+
void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
481
544
|
|
|
482
545
|
/**
|
|
483
546
|
* @brief Computes the index of the maximum value along the specified dimension
|
|
@@ -492,7 +555,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
492
555
|
* @param dst The destination tensor where the indices of the maximum values will
|
|
493
556
|
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
|
494
557
|
*/
|
|
495
|
-
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
558
|
+
void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
496
559
|
|
|
497
560
|
/**
|
|
498
561
|
* @brief Adds two tensors element-wise and stores the result in a destination
|
|
@@ -509,8 +572,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
509
572
|
* @param acl_src1 The second source tensor.
|
|
510
573
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
511
574
|
*/
|
|
512
|
-
void aclnn_add(ggml_backend_cann_context& ctx,
|
|
513
|
-
|
|
575
|
+
void aclnn_add(ggml_backend_cann_context & ctx,
|
|
576
|
+
aclTensor * acl_src0,
|
|
577
|
+
aclTensor * acl_src1,
|
|
578
|
+
aclTensor * acl_dst = nullptr);
|
|
514
579
|
|
|
515
580
|
/**
|
|
516
581
|
* @brief Sub two tensors element-wise and stores the result in a destination
|
|
@@ -527,8 +592,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
527
592
|
* @param acl_src1 The second source tensor.
|
|
528
593
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
529
594
|
*/
|
|
530
|
-
void aclnn_sub(ggml_backend_cann_context& ctx,
|
|
531
|
-
|
|
595
|
+
void aclnn_sub(ggml_backend_cann_context & ctx,
|
|
596
|
+
aclTensor * acl_src0,
|
|
597
|
+
aclTensor * acl_src1,
|
|
598
|
+
aclTensor * acl_dst = nullptr);
|
|
532
599
|
|
|
533
600
|
/**
|
|
534
601
|
* @brief Performs element-wise multiplication of two tensors and stores the
|
|
@@ -546,8 +613,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
546
613
|
* @param acl_other The second tensor for element-wise multiplication.
|
|
547
614
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
548
615
|
*/
|
|
549
|
-
void aclnn_mul(ggml_backend_cann_context& ctx,
|
|
550
|
-
|
|
616
|
+
void aclnn_mul(ggml_backend_cann_context & ctx,
|
|
617
|
+
aclTensor * acl_src,
|
|
618
|
+
aclTensor * acl_other,
|
|
619
|
+
aclTensor * acl_dst = nullptr);
|
|
551
620
|
|
|
552
621
|
/**
|
|
553
622
|
* @brief Matrix division, optionally in-place.
|
|
@@ -567,8 +636,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
567
636
|
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
568
637
|
* `acl_src`.
|
|
569
638
|
*/
|
|
570
|
-
void aclnn_div(ggml_backend_cann_context& ctx,
|
|
571
|
-
|
|
639
|
+
void aclnn_div(ggml_backend_cann_context & ctx,
|
|
640
|
+
aclTensor * acl_src,
|
|
641
|
+
aclTensor * acl_other,
|
|
642
|
+
aclTensor * acl_dst = nullptr);
|
|
572
643
|
|
|
573
644
|
/**
|
|
574
645
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
|
@@ -584,8 +655,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
584
655
|
* @param acl_dst The destination tensor where the cosine results will be
|
|
585
656
|
* stored.
|
|
586
657
|
*/
|
|
587
|
-
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
588
|
-
aclTensor* acl_dst);
|
|
658
|
+
void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
589
659
|
|
|
590
660
|
/**
|
|
591
661
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
|
@@ -602,8 +672,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
602
672
|
* @param acl_src The source tensor on which the sine function will be applied.
|
|
603
673
|
* @param acl_dst The destination tensor where the sine results will be stored.
|
|
604
674
|
*/
|
|
605
|
-
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
606
|
-
aclTensor* acl_dst);
|
|
675
|
+
void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
607
676
|
|
|
608
677
|
/**
|
|
609
678
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
|
@@ -621,8 +690,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
621
690
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
|
622
691
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
|
623
692
|
*/
|
|
624
|
-
void bcast_shape(ggml_tensor *
|
|
625
|
-
|
|
693
|
+
void bcast_shape(ggml_tensor * src0,
|
|
694
|
+
ggml_tensor * src1,
|
|
695
|
+
ggml_tensor * dst,
|
|
696
|
+
acl_tensor_ptr & acl_src0,
|
|
697
|
+
acl_tensor_ptr & acl_src1,
|
|
698
|
+
acl_tensor_ptr & acl_dst);
|
|
626
699
|
|
|
627
700
|
/**
|
|
628
701
|
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
|
@@ -637,7 +710,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
|
|
637
710
|
* @param dst The destination tensor where the transposed convolution result
|
|
638
711
|
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
|
639
712
|
*/
|
|
640
|
-
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
713
|
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
641
714
|
|
|
642
715
|
/**
|
|
643
716
|
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
|
@@ -662,7 +735,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|
|
662
735
|
* @param dst The destination tensor where the ELU-activated result will be stored.
|
|
663
736
|
* dst->op is expected to be `GGML_OP_ELU`.
|
|
664
737
|
*/
|
|
665
|
-
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
738
|
+
void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
666
739
|
|
|
667
740
|
/**
|
|
668
741
|
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
|
|
@@ -677,7 +750,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
677
750
|
* @param dst The destination tensor where the mean result will be stored.
|
|
678
751
|
* dst->op is expected to be `GGML_OP_MEAN`.
|
|
679
752
|
*/
|
|
680
|
-
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
753
|
+
void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
681
754
|
|
|
682
755
|
/**
|
|
683
756
|
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
|
|
@@ -692,7 +765,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
692
765
|
* @param dst The destination tensor where the padded result will be stored.
|
|
693
766
|
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
|
|
694
767
|
*/
|
|
695
|
-
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
768
|
+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
696
769
|
|
|
697
770
|
/**
|
|
698
771
|
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
|
|
@@ -708,7 +781,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
708
781
|
* @param dst The destination tensor where the result will be stored.
|
|
709
782
|
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
|
|
710
783
|
*/
|
|
711
|
-
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
784
|
+
void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
712
785
|
|
|
713
786
|
/**
|
|
714
787
|
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
|
|
@@ -723,7 +796,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
723
796
|
* @param dst The destination tensor where the result will be stored.
|
|
724
797
|
* dst->op is expected to be `GGML_OP_STEP`.
|
|
725
798
|
*/
|
|
726
|
-
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
799
|
+
void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
727
800
|
|
|
728
801
|
/**
|
|
729
802
|
* @brief Performs the Flash Attention extended operator using the CANN backend.
|
|
@@ -738,59 +811,46 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
738
811
|
* @param dst The destination tensor where the result will be stored.
|
|
739
812
|
* dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
|
|
740
813
|
*/
|
|
741
|
-
void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
814
|
+
void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
742
815
|
|
|
743
816
|
/*
|
|
744
817
|
* @brief A generic wrapper for ACL resources with custom deleter support.
|
|
745
818
|
*/
|
|
746
|
-
using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
|
|
819
|
+
using any_acl_resource = std::unique_ptr<void, std::function<void(void *)>>;
|
|
747
820
|
|
|
748
821
|
/**
|
|
749
822
|
* @brief Trait structure used to define how to destroy a given ACL resource type.
|
|
750
823
|
*
|
|
751
824
|
* @tparam T ACL resource type.
|
|
752
825
|
*/
|
|
753
|
-
template<typename T>
|
|
754
|
-
struct acl_resource_traits;
|
|
826
|
+
template <typename T> struct acl_resource_traits;
|
|
755
827
|
|
|
756
828
|
/**
|
|
757
829
|
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
|
|
758
830
|
*/
|
|
759
|
-
template<>
|
|
760
|
-
|
|
761
|
-
static void destroy(void* p) {
|
|
762
|
-
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
|
|
763
|
-
}
|
|
831
|
+
template <> struct acl_resource_traits<aclTensor> {
|
|
832
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); }
|
|
764
833
|
};
|
|
765
834
|
|
|
766
835
|
/**
|
|
767
836
|
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
|
|
768
837
|
*/
|
|
769
|
-
template<>
|
|
770
|
-
|
|
771
|
-
static void destroy(void* p) {
|
|
772
|
-
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
|
|
773
|
-
}
|
|
838
|
+
template <> struct acl_resource_traits<aclIntArray> {
|
|
839
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); }
|
|
774
840
|
};
|
|
775
841
|
|
|
776
842
|
/**
|
|
777
843
|
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
|
|
778
844
|
*/
|
|
779
|
-
template<>
|
|
780
|
-
|
|
781
|
-
static void destroy(void* p) {
|
|
782
|
-
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
|
|
783
|
-
}
|
|
845
|
+
template <> struct acl_resource_traits<aclScalar> {
|
|
846
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); }
|
|
784
847
|
};
|
|
785
848
|
|
|
786
849
|
/**
|
|
787
850
|
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
|
|
788
851
|
*/
|
|
789
|
-
template<>
|
|
790
|
-
|
|
791
|
-
static void destroy(void* p) {
|
|
792
|
-
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
|
|
793
|
-
}
|
|
852
|
+
template <> struct acl_resource_traits<aclTensorList> {
|
|
853
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); }
|
|
794
854
|
};
|
|
795
855
|
|
|
796
856
|
/**
|
|
@@ -800,14 +860,8 @@ struct acl_resource_traits<aclTensorList> {
|
|
|
800
860
|
* @param ptr Raw pointer to ACL resource.
|
|
801
861
|
* @return any_acl_resource Smart pointer that handles destruction.
|
|
802
862
|
*/
|
|
803
|
-
template<typename T>
|
|
804
|
-
any_acl_resource
|
|
805
|
-
return any_acl_resource(
|
|
806
|
-
static_cast<void*>(ptr),
|
|
807
|
-
[](void* p) {
|
|
808
|
-
acl_resource_traits<T>::destroy(p);
|
|
809
|
-
}
|
|
810
|
-
);
|
|
863
|
+
template <typename T> any_acl_resource make_acl_resource(T * ptr) {
|
|
864
|
+
return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); });
|
|
811
865
|
}
|
|
812
866
|
|
|
813
867
|
/**
|
|
@@ -817,89 +871,10 @@ any_acl_resource make_acl_resource(T* ptr) {
|
|
|
817
871
|
* @param vec Target vector to hold ACL resources.
|
|
818
872
|
* @param args Raw pointers to ACL resources.
|
|
819
873
|
*/
|
|
820
|
-
template<typename... Args>
|
|
821
|
-
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
|
|
874
|
+
template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) {
|
|
822
875
|
(vec.emplace_back(make_acl_resource(args)), ...);
|
|
823
876
|
}
|
|
824
877
|
|
|
825
|
-
/**
|
|
826
|
-
* @brief Task class that wraps the execution of an aclnn function call.
|
|
827
|
-
*/
|
|
828
|
-
class aclnn_task : public cann_task {
|
|
829
|
-
public:
|
|
830
|
-
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
|
|
831
|
-
uint64_t workspace_size, aclOpExecutor * executor,
|
|
832
|
-
aclrtStream stream) :
|
|
833
|
-
aclnn_func_(aclnn_func),
|
|
834
|
-
workspace_addr_(workspace_addr),
|
|
835
|
-
workspace_size_(workspace_size),
|
|
836
|
-
executor_(executor),
|
|
837
|
-
stream_(stream) {}
|
|
838
|
-
virtual void run_task() override {
|
|
839
|
-
ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
|
|
840
|
-
}
|
|
841
|
-
private:
|
|
842
|
-
aclnn_func_t aclnn_func_;
|
|
843
|
-
void * workspace_addr_;
|
|
844
|
-
uint64_t workspace_size_;
|
|
845
|
-
aclOpExecutor * executor_;
|
|
846
|
-
aclrtStream stream_;
|
|
847
|
-
};
|
|
848
|
-
|
|
849
|
-
/**
|
|
850
|
-
* @brief Task class that releases ACL resources after usage.
|
|
851
|
-
*/
|
|
852
|
-
class release_resource_task : public cann_task {
|
|
853
|
-
public:
|
|
854
|
-
release_resource_task(std::vector<any_acl_resource>&& resources){
|
|
855
|
-
resource_ = std::move(resources);
|
|
856
|
-
}
|
|
857
|
-
|
|
858
|
-
virtual void run_task() override {
|
|
859
|
-
resource_.clear();
|
|
860
|
-
}
|
|
861
|
-
private:
|
|
862
|
-
std::vector<any_acl_resource> resource_;
|
|
863
|
-
};
|
|
864
|
-
|
|
865
|
-
/**
|
|
866
|
-
* @brief Task class for performing asynchronous memory copy operations.
|
|
867
|
-
*/
|
|
868
|
-
class async_memcpy_task : public cann_task {
|
|
869
|
-
public:
|
|
870
|
-
async_memcpy_task(void* dst, const void* src, size_t size,
|
|
871
|
-
aclrtMemcpyKind kind, aclrtStream stream)
|
|
872
|
-
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
|
|
873
|
-
|
|
874
|
-
virtual void run_task() override {
|
|
875
|
-
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
|
|
876
|
-
}
|
|
877
|
-
private:
|
|
878
|
-
void* dst_;
|
|
879
|
-
const void* src_;
|
|
880
|
-
size_t size_;
|
|
881
|
-
aclrtMemcpyKind kind_;
|
|
882
|
-
aclrtStream stream_;
|
|
883
|
-
};
|
|
884
|
-
|
|
885
|
-
/**
|
|
886
|
-
* @brief Task class for performing asynchronous memory set operations.
|
|
887
|
-
*/
|
|
888
|
-
class async_memset_task : public cann_task {
|
|
889
|
-
public:
|
|
890
|
-
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
|
|
891
|
-
: buffer_(buffer), size_(size), value_(value), stream_(stream) {}
|
|
892
|
-
|
|
893
|
-
virtual void run_task() override {
|
|
894
|
-
ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
|
|
895
|
-
}
|
|
896
|
-
private:
|
|
897
|
-
void* buffer_;
|
|
898
|
-
size_t size_;
|
|
899
|
-
int32_t value_;
|
|
900
|
-
aclrtStream stream_;
|
|
901
|
-
};
|
|
902
|
-
|
|
903
878
|
/**
|
|
904
879
|
* @brief Launches an asynchronous task using the memory allocator.
|
|
905
880
|
*
|
|
@@ -918,92 +893,20 @@ class async_memset_task : public cann_task {
|
|
|
918
893
|
* same stream are executed in queue order.
|
|
919
894
|
*/
|
|
920
895
|
|
|
921
|
-
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)
|
|
922
|
-
do {
|
|
923
|
-
uint64_t workspaceSize = 0;
|
|
924
|
-
aclOpExecutor * executor;
|
|
925
|
-
void * workspaceAddr = nullptr;
|
|
926
|
-
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor))
|
|
927
|
-
/* workspace should alloced in main thread to keep malloc order when using vmm. */
|
|
928
|
-
if (workspaceSize > 0) {
|
|
929
|
-
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);
|
|
930
|
-
workspaceAddr = workspace_allocator.get();
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
auto task = \
|
|
934
|
-
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
|
|
935
|
-
executor, CTX.stream()); \
|
|
936
|
-
CTX.task_queue.submit_task(std::move(task)); \
|
|
937
|
-
} else { \
|
|
938
|
-
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\
|
|
939
|
-
} \
|
|
896
|
+
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
|
|
897
|
+
do { \
|
|
898
|
+
uint64_t workspaceSize = 0; \
|
|
899
|
+
aclOpExecutor * executor; \
|
|
900
|
+
void * workspaceAddr = nullptr; \
|
|
901
|
+
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
|
902
|
+
/* workspace should alloced in main thread to keep malloc order when using vmm. */ \
|
|
903
|
+
if (workspaceSize > 0) { \
|
|
904
|
+
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
|
|
905
|
+
workspaceAddr = workspace_allocator.get(); \
|
|
906
|
+
} \
|
|
907
|
+
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
|
|
940
908
|
} while (0)
|
|
941
909
|
|
|
942
|
-
/**
|
|
943
|
-
* @brief Registers and releases multiple ACL resources, optionally deferring the release
|
|
944
|
-
* using a task.
|
|
945
|
-
*
|
|
946
|
-
* @tparam Args Types of the ACL resources.
|
|
947
|
-
* @param ctx Backend context which manages task submission and async mode.
|
|
948
|
-
* @param args Pointers to ACL resources to be released.
|
|
949
|
-
*/
|
|
950
|
-
template <typename... Args>
|
|
951
|
-
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
|
|
952
|
-
std::vector<any_acl_resource> resources;
|
|
953
|
-
register_acl_resources(resources, std::forward<Args>(args)...);
|
|
954
|
-
if(ctx.async_mode) {
|
|
955
|
-
auto task = std::make_unique<release_resource_task>(std::move(resources));
|
|
956
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
/**
|
|
961
|
-
* @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
|
|
962
|
-
*
|
|
963
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
964
|
-
* @param dst Destination memory address.
|
|
965
|
-
* @param src Source memory address.
|
|
966
|
-
* @param len Size of memory to copy (in bytes).
|
|
967
|
-
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
|
|
968
|
-
*/
|
|
969
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
|
|
970
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
971
|
-
if (ctx.async_mode) {
|
|
972
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
|
|
973
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
974
|
-
} else {
|
|
975
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx.stream()));
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
|
|
979
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
|
|
980
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
981
|
-
if (ctx->async_mode) {
|
|
982
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
|
|
983
|
-
ctx->task_queue.submit_task(std::move(task));
|
|
984
|
-
} else {
|
|
985
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx->stream()));
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
/**
|
|
990
|
-
* @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
|
|
991
|
-
*
|
|
992
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
993
|
-
* @param buffer Memory buffer to be set.
|
|
994
|
-
* @param size Size of the memory buffer (in bytes).
|
|
995
|
-
* @param value Value to set in the buffer.
|
|
996
|
-
*/
|
|
997
|
-
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
|
|
998
|
-
size_t size, int value) {
|
|
999
|
-
if (ctx.async_mode) {
|
|
1000
|
-
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
|
|
1001
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
1002
|
-
} else {
|
|
1003
|
-
ACL_CHECK(aclrtMemsetAsync(buffer, size, value, size, ctx.stream()));
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
1006
|
-
|
|
1007
910
|
/**
|
|
1008
911
|
* @brief Performs sparse expert-based matrix multiplication using the CANN backend.
|
|
1009
912
|
*
|
|
@@ -1029,7 +932,7 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
|
|
|
1029
932
|
* @param dst The destination tensor where the expert-weighted token outputs are stored.
|
|
1030
933
|
* Expected to be of shape [M, K, N, 1].
|
|
1031
934
|
*/
|
|
1032
|
-
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
935
|
+
void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
1033
936
|
|
|
1034
937
|
/**
|
|
1035
938
|
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
|
|
@@ -1041,20 +944,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
1041
944
|
*
|
|
1042
945
|
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
|
|
1043
946
|
*/
|
|
1044
|
-
static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
1045
|
-
std::string
|
|
1046
|
-
static const std::unordered_set<std::string> weight_suffixes{
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
"ffn_gate.weight",
|
|
1053
|
-
"ffn_up.weight",
|
|
1054
|
-
"ffn_down.weight"
|
|
1055
|
-
};
|
|
1056
|
-
|
|
1057
|
-
for (const auto& suffix : weight_suffixes) {
|
|
947
|
+
static bool is_matmul_weight(const ggml_tensor * tensor) {
|
|
948
|
+
std::string name = ggml_get_name(tensor);
|
|
949
|
+
static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
|
|
950
|
+
"attn_k.weight", "attn_v.weight",
|
|
951
|
+
"attn_output.weight", "ffn_gate.weight",
|
|
952
|
+
"ffn_up.weight", "ffn_down.weight" };
|
|
953
|
+
|
|
954
|
+
for (const auto & suffix : weight_suffixes) {
|
|
1058
955
|
if (name.find(suffix) != std::string::npos) {
|
|
1059
956
|
return true;
|
|
1060
957
|
}
|
|
@@ -1078,23 +975,17 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
|
1078
975
|
* @param ctx The CANN backend context used to manage execution and resources.
|
|
1079
976
|
* @param dst The destination tensor.
|
|
1080
977
|
*/
|
|
1081
|
-
template <auto binary_op>
|
|
1082
|
-
|
|
1083
|
-
ggml_tensor*
|
|
1084
|
-
ggml_tensor* src1 = dst->src[1];
|
|
978
|
+
template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
979
|
+
ggml_tensor * src0 = dst->src[0];
|
|
980
|
+
ggml_tensor * src1 = dst->src[1];
|
|
1085
981
|
|
|
1086
|
-
|
|
1087
|
-
aclTensor* acl_src1;
|
|
1088
|
-
aclTensor* acl_dst;
|
|
982
|
+
acl_tensor_ptr acl_src0, acl_src1, acl_dst;
|
|
1089
983
|
|
|
1090
984
|
// Need bcast
|
|
1091
|
-
bcast_shape(src0, src1, dst,
|
|
1092
|
-
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
|
1093
|
-
|
|
1094
|
-
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
|
|
985
|
+
bcast_shape(src0, src1, dst, acl_src0, acl_src1, acl_dst);
|
|
986
|
+
binary_op(ctx, acl_src0.get(), acl_src1.get(), acl_dst.get());
|
|
1095
987
|
}
|
|
1096
988
|
|
|
1097
|
-
|
|
1098
989
|
/**
|
|
1099
990
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
|
1100
991
|
*
|
|
@@ -1102,20 +993,19 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1102
993
|
* and stores the result in the destination tensor.
|
|
1103
994
|
*
|
|
1104
995
|
* @tparam unary_op A callable with the signature:
|
|
1105
|
-
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
|
996
|
+
* void(ggml_backend_cann_context&, aclTensor *, aclTensor *)
|
|
1106
997
|
* where the first aclTensor is the source and the second is the destination.
|
|
1107
998
|
* @param ctx The CANN backend context for managing resources and execution.
|
|
1108
999
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
|
1109
1000
|
*/
|
|
1110
|
-
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
1111
|
-
|
|
1112
|
-
ggml_tensor* src = dst->src[0];
|
|
1001
|
+
template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
|
|
1002
|
+
void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
1003
|
+
ggml_tensor * src = dst->src[0];
|
|
1113
1004
|
|
|
1114
|
-
|
|
1115
|
-
|
|
1005
|
+
acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
|
|
1006
|
+
acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
|
|
1116
1007
|
|
|
1117
|
-
unary_op(ctx, acl_src, acl_dst);
|
|
1118
|
-
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
1008
|
+
unary_op(ctx, acl_src.get(), acl_dst.get());
|
|
1119
1009
|
}
|
|
1120
1010
|
|
|
1121
1011
|
/**
|
|
@@ -1138,9 +1028,9 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
1138
1028
|
*
|
|
1139
1029
|
* @see GGML_CANN_CALL_OP_UNARY
|
|
1140
1030
|
*/
|
|
1141
|
-
void ggml_cann_op_unary(
|
|
1142
|
-
|
|
1143
|
-
|
|
1031
|
+
void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1032
|
+
ggml_backend_cann_context & ctx,
|
|
1033
|
+
ggml_tensor * dst);
|
|
1144
1034
|
|
|
1145
1035
|
/**
|
|
1146
1036
|
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
|
|
@@ -1172,9 +1062,9 @@ void ggml_cann_op_unary(
|
|
|
1172
1062
|
*
|
|
1173
1063
|
* @see GGML_CANN_CALL_OP_UNARY_GATED
|
|
1174
1064
|
*/
|
|
1175
|
-
void ggml_cann_op_unary_gated(
|
|
1176
|
-
|
|
1177
|
-
|
|
1065
|
+
void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1066
|
+
ggml_backend_cann_context & ctx,
|
|
1067
|
+
ggml_tensor * dst);
|
|
1178
1068
|
|
|
1179
1069
|
/**
|
|
1180
1070
|
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
|
|
@@ -1197,16 +1087,13 @@ void ggml_cann_op_unary_gated(
|
|
|
1197
1087
|
* @see ggml_cann_op_unary
|
|
1198
1088
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1199
1089
|
*/
|
|
1200
|
-
#define GGML_CANN_CALL_OP_UNARY(OP_NAME)
|
|
1201
|
-
do {
|
|
1202
|
-
auto lambda = [](ggml_backend_cann_context& ctx,
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1208
|
-
} \
|
|
1209
|
-
while (0)
|
|
1090
|
+
#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
1091
|
+
do { \
|
|
1092
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1093
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1094
|
+
}; \
|
|
1095
|
+
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1096
|
+
} while (0)
|
|
1210
1097
|
|
|
1211
1098
|
/**
|
|
1212
1099
|
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
|
|
@@ -1229,15 +1116,12 @@ void ggml_cann_op_unary_gated(
|
|
|
1229
1116
|
* @see ggml_cann_op_unary_gated
|
|
1230
1117
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1231
1118
|
*/
|
|
1232
|
-
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME)
|
|
1233
|
-
do {
|
|
1234
|
-
auto lambda = [](ggml_backend_cann_context& ctx,
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1240
|
-
} \
|
|
1241
|
-
while (0)
|
|
1119
|
+
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
1120
|
+
do { \
|
|
1121
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1122
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1123
|
+
}; \
|
|
1124
|
+
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1125
|
+
} while (0)
|
|
1242
1126
|
|
|
1243
1127
|
#endif // CANN_ACLNN_OPS
|