@novastera-oss/llamarn 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +8 -8
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +62 -1
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +22 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +15 -47
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
- package/cpp/llama.cpp/src/llama-arch.h +23 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
- package/cpp/llama.cpp/src/llama-batch.h +31 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
- package/cpp/llama.cpp/src/llama-chat.h +3 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
- package/cpp/llama.cpp/src/llama-graph.h +184 -122
- package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
- package/cpp/llama.cpp/src/llama-hparams.h +13 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
- package/cpp/llama.cpp/src/llama-model.h +21 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
- package/cpp/llama.cpp/src/llama-vocab.h +43 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +22 -4
- package/ios/include/llama.h +15 -47
- package/ios/libs/llama.xcframework/Info.plist +13 -13
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -22,21 +22,6 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
|
|
|
22
22
|
return t->view_src != NULL;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
|
26
|
-
if (a->type != b->type) {
|
|
27
|
-
return false;
|
|
28
|
-
}
|
|
29
|
-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
|
30
|
-
if (a->ne[i] != b->ne[i]) {
|
|
31
|
-
return false;
|
|
32
|
-
}
|
|
33
|
-
if (a->nb[i] != b->nb[i]) {
|
|
34
|
-
return false;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
return true;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
25
|
// ops that return true for this function must not use restrict pointers for their backend implementations
|
|
41
26
|
static bool ggml_op_can_inplace(enum ggml_op op) {
|
|
42
27
|
switch (op) {
|
|
@@ -45,6 +45,10 @@
|
|
|
45
45
|
#include "ggml-vulkan.h"
|
|
46
46
|
#endif
|
|
47
47
|
|
|
48
|
+
#ifdef GGML_USE_WEBGPU
|
|
49
|
+
#include "ggml-webgpu.h"
|
|
50
|
+
#endif
|
|
51
|
+
|
|
48
52
|
#ifdef GGML_USE_OPENCL
|
|
49
53
|
#include "ggml-opencl.h"
|
|
50
54
|
#endif
|
|
@@ -61,10 +65,6 @@
|
|
|
61
65
|
#include "ggml-cann.h"
|
|
62
66
|
#endif
|
|
63
67
|
|
|
64
|
-
#ifdef GGML_USE_KOMPUTE
|
|
65
|
-
#include "ggml-kompute.h"
|
|
66
|
-
#endif
|
|
67
|
-
|
|
68
68
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
69
69
|
#if defined(__clang__)
|
|
70
70
|
# pragma clang diagnostic push
|
|
@@ -177,6 +177,9 @@ struct ggml_backend_registry {
|
|
|
177
177
|
#ifdef GGML_USE_VULKAN
|
|
178
178
|
register_backend(ggml_backend_vk_reg());
|
|
179
179
|
#endif
|
|
180
|
+
#ifdef GGML_USE_WEBGPU
|
|
181
|
+
register_backend(ggml_backend_webgpu_reg());
|
|
182
|
+
#endif
|
|
180
183
|
#ifdef GGML_USE_OPENCL
|
|
181
184
|
register_backend(ggml_backend_opencl_reg());
|
|
182
185
|
#endif
|
|
@@ -189,9 +192,6 @@ struct ggml_backend_registry {
|
|
|
189
192
|
#ifdef GGML_USE_RPC
|
|
190
193
|
register_backend(ggml_backend_rpc_reg());
|
|
191
194
|
#endif
|
|
192
|
-
#ifdef GGML_USE_KOMPUTE
|
|
193
|
-
register_backend(ggml_backend_kompute_reg());
|
|
194
|
-
#endif
|
|
195
195
|
#ifdef GGML_USE_CPU
|
|
196
196
|
register_backend(ggml_backend_cpu_reg());
|
|
197
197
|
#endif
|
|
@@ -575,7 +575,6 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
575
575
|
ggml_backend_load_best("cann", silent, dir_path);
|
|
576
576
|
ggml_backend_load_best("cuda", silent, dir_path);
|
|
577
577
|
ggml_backend_load_best("hip", silent, dir_path);
|
|
578
|
-
ggml_backend_load_best("kompute", silent, dir_path);
|
|
579
578
|
ggml_backend_load_best("metal", silent, dir_path);
|
|
580
579
|
ggml_backend_load_best("rpc", silent, dir_path);
|
|
581
580
|
ggml_backend_load_best("sycl", silent, dir_path);
|
|
@@ -352,21 +352,6 @@ ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
|
|
|
352
352
|
|
|
353
353
|
// backend copy
|
|
354
354
|
|
|
355
|
-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
|
356
|
-
if (a->type != b->type) {
|
|
357
|
-
return false;
|
|
358
|
-
}
|
|
359
|
-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
|
360
|
-
if (a->ne[i] != b->ne[i]) {
|
|
361
|
-
return false;
|
|
362
|
-
}
|
|
363
|
-
if (a->nb[i] != b->nb[i]) {
|
|
364
|
-
return false;
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
return true;
|
|
368
|
-
}
|
|
369
|
-
|
|
370
355
|
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
|
|
371
356
|
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
|
372
357
|
|
|
@@ -662,6 +647,7 @@ struct ggml_backend_sched {
|
|
|
662
647
|
// pipeline parallelism support
|
|
663
648
|
int n_copies;
|
|
664
649
|
int cur_copy;
|
|
650
|
+
int next_copy;
|
|
665
651
|
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
|
666
652
|
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
|
667
653
|
int n_graph_inputs;
|
|
@@ -817,8 +803,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
|
817
803
|
}
|
|
818
804
|
if (sched->debug > 1) {
|
|
819
805
|
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
|
820
|
-
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
|
821
|
-
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)
|
|
806
|
+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
|
|
807
|
+
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
|
808
|
+
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
|
|
822
809
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
|
823
810
|
struct ggml_tensor * src = node->src[j];
|
|
824
811
|
if (src == NULL) {
|
|
@@ -1447,8 +1434,6 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|
|
1447
1434
|
}
|
|
1448
1435
|
}
|
|
1449
1436
|
|
|
1450
|
-
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
|
|
1451
|
-
|
|
1452
1437
|
return GGML_STATUS_SUCCESS;
|
|
1453
1438
|
}
|
|
1454
1439
|
|
|
@@ -1549,10 +1534,10 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
|
|
1549
1534
|
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
|
1550
1535
|
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
|
1551
1536
|
|
|
1552
|
-
ggml_backend_sched_split_graph(sched, measure_graph);
|
|
1553
|
-
|
|
1554
1537
|
ggml_backend_sched_synchronize(sched);
|
|
1555
1538
|
|
|
1539
|
+
ggml_backend_sched_split_graph(sched, measure_graph);
|
|
1540
|
+
|
|
1556
1541
|
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
|
1557
1542
|
return false;
|
|
1558
1543
|
}
|
|
@@ -1564,6 +1549,10 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
|
|
1564
1549
|
|
|
1565
1550
|
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
|
1566
1551
|
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
|
1552
|
+
GGML_ASSERT(!sched->is_alloc);
|
|
1553
|
+
|
|
1554
|
+
sched->cur_copy = sched->next_copy;
|
|
1555
|
+
sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
|
|
1567
1556
|
|
|
1568
1557
|
ggml_backend_sched_split_graph(sched, graph);
|
|
1569
1558
|
|
|
@@ -1604,7 +1593,7 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
|
|
1604
1593
|
// if the graph is not already allocated, always use copy 0 after a synchronization
|
|
1605
1594
|
// this ensures that during generation the same copy is used every time,
|
|
1606
1595
|
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
|
1607
|
-
sched->
|
|
1596
|
+
sched->next_copy = 0;
|
|
1608
1597
|
}
|
|
1609
1598
|
}
|
|
1610
1599
|
|
|
@@ -1826,7 +1815,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
|
|
1826
1815
|
ggml_free(copy.ctx_unallocated);
|
|
1827
1816
|
}
|
|
1828
1817
|
|
|
1829
|
-
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
|
|
1818
|
+
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
|
1830
1819
|
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
|
1831
1820
|
if (copy.buffer == NULL) {
|
|
1832
1821
|
return false;
|
|
@@ -1837,28 +1826,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
|
1837
1826
|
|
|
1838
1827
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1839
1828
|
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1829
|
+
if (test_node != nullptr) {
|
|
1830
|
+
// Compute the whole graph and only test the output for a specific tensor
|
|
1831
|
+
ggml_backend_graph_compute(backend1, g1);
|
|
1832
|
+
ggml_backend_graph_compute(backend2, g2);
|
|
1843
1833
|
|
|
1844
|
-
|
|
1834
|
+
int test_node_idx = -1;
|
|
1835
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1836
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1837
|
+
if (t1 == test_node) {
|
|
1838
|
+
test_node_idx = i;
|
|
1839
|
+
break;
|
|
1840
|
+
}
|
|
1841
|
+
}
|
|
1842
|
+
GGML_ASSERT(test_node_idx != -1);
|
|
1845
1843
|
|
|
1846
|
-
|
|
1847
|
-
|
|
1844
|
+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
1845
|
+
} else {
|
|
1846
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1847
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
|
1848
|
+
struct ggml_tensor * t2 = g2->nodes[i];
|
|
1848
1849
|
|
|
1849
|
-
|
|
1850
|
-
ggml_backend_graph_compute(backend2, &g2v);
|
|
1850
|
+
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
|
|
1851
1851
|
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
}
|
|
1852
|
+
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
|
|
1853
|
+
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
|
|
1855
1854
|
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1855
|
+
ggml_backend_graph_compute(backend1, &g1v);
|
|
1856
|
+
ggml_backend_graph_compute(backend2, &g2v);
|
|
1857
|
+
|
|
1858
|
+
if (ggml_is_view_op(t1->op)) {
|
|
1859
|
+
continue;
|
|
1860
|
+
}
|
|
1861
|
+
|
|
1862
|
+
// compare results, calculate rms etc
|
|
1863
|
+
if (!callback(i, t1, t2, user_data)) {
|
|
1864
|
+
break;
|
|
1865
|
+
}
|
|
1859
1866
|
}
|
|
1860
1867
|
}
|
|
1861
|
-
|
|
1862
1868
|
ggml_backend_graph_copy_free(copy);
|
|
1863
1869
|
|
|
1864
1870
|
return true;
|
|
@@ -77,6 +77,8 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
|
77
77
|
for (int i = 0; i < final_dims; i++) {
|
|
78
78
|
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
|
|
79
79
|
}
|
|
80
|
+
size_t elem_offset = offset / ggml_element_size(tensor);
|
|
81
|
+
acl_storage_len += elem_offset;
|
|
80
82
|
|
|
81
83
|
// Reverse ne and stride.
|
|
82
84
|
std::reverse(acl_ne, acl_ne + final_dims);
|
|
@@ -84,7 +86,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
|
84
86
|
|
|
85
87
|
aclTensor* acl_tensor = aclCreateTensor(
|
|
86
88
|
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
|
|
87
|
-
|
|
89
|
+
elem_offset, format, &acl_storage_len, 1,
|
|
88
90
|
tensor->data);
|
|
89
91
|
|
|
90
92
|
return acl_tensor;
|
|
@@ -65,8 +65,9 @@
|
|
|
65
65
|
#include <aclnnop/aclnn_eq_tensor.h>
|
|
66
66
|
#include <aclnnop/aclnn_gt_scalar.h>
|
|
67
67
|
#include <aclnnop/aclnn_pow.h>
|
|
68
|
-
#include <aclnnop/
|
|
68
|
+
#include <aclnnop/aclnn_grouped_matmul_v3.h>
|
|
69
69
|
#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
|
|
70
|
+
#include <aclnnop/aclnn_zero.h>
|
|
70
71
|
#include <float.h>
|
|
71
72
|
|
|
72
73
|
#include <cmath>
|
|
@@ -98,7 +99,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclT
|
|
|
98
99
|
}
|
|
99
100
|
}
|
|
100
101
|
|
|
101
|
-
void
|
|
102
|
+
void ggml_cann_op_unary(
|
|
102
103
|
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
103
104
|
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
104
105
|
ggml_tensor* src = dst->src[0];
|
|
@@ -110,6 +111,42 @@ void ggml_cann_unary_op(
|
|
|
110
111
|
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
111
112
|
}
|
|
112
113
|
|
|
114
|
+
void ggml_cann_op_unary_gated(
|
|
115
|
+
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
116
|
+
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
117
|
+
ggml_tensor* src0 = dst->src[0];
|
|
118
|
+
ggml_tensor* src1 = dst->src[1];
|
|
119
|
+
|
|
120
|
+
GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
121
|
+
GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
122
|
+
const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
123
|
+
|
|
124
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
125
|
+
aclTensor *acl_src0 = nullptr, *acl_src1 = nullptr;
|
|
126
|
+
if(src1) {
|
|
127
|
+
GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
128
|
+
GGML_ASSERT(src0->type == src1->type);
|
|
129
|
+
|
|
130
|
+
acl_src0 = ggml_cann_create_tensor(src0);
|
|
131
|
+
acl_src1 = ggml_cann_create_tensor(src1);
|
|
132
|
+
} else {
|
|
133
|
+
int64_t ne[] = {src0->ne[0] / 2, src0->ne[1], src0->ne[2], src0->ne[3]};
|
|
134
|
+
size_t nb[] = {src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]};
|
|
135
|
+
acl_src0 = ggml_cann_create_tensor(src0, ne, nb, GGML_MAX_DIMS, ACL_FORMAT_ND, 0);
|
|
136
|
+
acl_src1 = ggml_cann_create_tensor(src0, ne, nb, GGML_MAX_DIMS, ACL_FORMAT_ND, ne[0] * ggml_element_size(src0));
|
|
137
|
+
if (swapped) {
|
|
138
|
+
std::swap(acl_src0, acl_src1);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
unary_op(ctx, acl_src0, acl_dst);
|
|
143
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, acl_dst, acl_src1);
|
|
144
|
+
|
|
145
|
+
ggml_cann_release_resources(ctx, acl_src0, acl_dst);
|
|
146
|
+
if(src1)
|
|
147
|
+
ggml_cann_release_resources(ctx, acl_src1);
|
|
148
|
+
}
|
|
149
|
+
|
|
113
150
|
/**
|
|
114
151
|
* @brief Repeats elements of a tensor along each dimension according to the
|
|
115
152
|
* specified repeat array.
|
|
@@ -804,10 +841,11 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
|
|
|
804
841
|
nb[i] = nb[i - 1] * ne[i - 1];
|
|
805
842
|
}
|
|
806
843
|
|
|
807
|
-
ggml_cann_async_memset(ctx, buffer, n_bytes, 0);
|
|
808
844
|
aclTensor* zero =
|
|
809
845
|
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
|
|
846
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
|
|
810
847
|
return zero;
|
|
848
|
+
GGML_UNUSED(n_bytes);
|
|
811
849
|
}
|
|
812
850
|
|
|
813
851
|
/**
|
|
@@ -1783,8 +1821,27 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
|
|
|
1783
1821
|
size_t transpose_nb[] = {bcast_weight_nb[1], bcast_weight_nb[0],
|
|
1784
1822
|
bcast_weight_nb[2], bcast_weight_nb[3],
|
|
1785
1823
|
bcast_weight_nb[4], bcast_weight_nb[5]};
|
|
1786
|
-
aclTensor* acl_weight_tensor
|
|
1787
|
-
|
|
1824
|
+
aclTensor* acl_weight_tensor;
|
|
1825
|
+
|
|
1826
|
+
bool weightToNZ = false;
|
|
1827
|
+
#ifdef ASCEND_310P
|
|
1828
|
+
weightToNZ = (getenv("GGML_CANN_WEIGHT_NZ") != nullptr);
|
|
1829
|
+
#endif
|
|
1830
|
+
if (weightToNZ && is_matmul_weight(weight)) {
|
|
1831
|
+
int64_t acl_stride[2] = {1, transpose_ne[1]};
|
|
1832
|
+
|
|
1833
|
+
// Reverse ne.
|
|
1834
|
+
std::reverse(transpose_ne, transpose_ne + n_dims);
|
|
1835
|
+
|
|
1836
|
+
std::vector<int64_t> storageDims = {transpose_ne[0], transpose_ne[1]};
|
|
1837
|
+
|
|
1838
|
+
acl_weight_tensor = aclCreateTensor(
|
|
1839
|
+
transpose_ne, n_dims, ggml_cann_type_mapping(weight->type), acl_stride,
|
|
1840
|
+
0, ACL_FORMAT_FRACTAL_NZ, storageDims.data(), 2, weight->data);
|
|
1841
|
+
} else {
|
|
1842
|
+
acl_weight_tensor =
|
|
1843
|
+
ggml_cann_create_tensor(weight, transpose_ne, transpose_nb, n_dims, ACL_FORMAT_ND);
|
|
1844
|
+
}
|
|
1788
1845
|
aclTensor* acl_dst =
|
|
1789
1846
|
ggml_cann_create_tensor(dst, bcast_dst_ne, bcast_dst_nb, n_dims);
|
|
1790
1847
|
|
|
@@ -2654,6 +2711,67 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2654
2711
|
memcpy(ori_src0_nb, cast_nb, sizeof(ori_src0_nb));
|
|
2655
2712
|
}
|
|
2656
2713
|
|
|
2714
|
+
#ifdef ASCEND_310P
|
|
2715
|
+
ggml_tensor src0_row = *src0;
|
|
2716
|
+
ggml_tensor src1_row = *src1;
|
|
2717
|
+
ggml_tensor dst_row = *dst;
|
|
2718
|
+
|
|
2719
|
+
if (src0->type == GGML_TYPE_F16) {
|
|
2720
|
+
src0_row.type = GGML_TYPE_F32;
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
// src0_row [D, M, 1, 1] weight without permute
|
|
2724
|
+
src0_row.ne[2] = 1;
|
|
2725
|
+
src0_row.ne[3] = 1;
|
|
2726
|
+
src0_row.nb[0] = ori_src0_nb[0];
|
|
2727
|
+
src0_row.nb[1] = ori_src0_nb[1];
|
|
2728
|
+
src0_row.nb[2] = ori_src0_nb[1];
|
|
2729
|
+
src0_row.nb[3] = ori_src0_nb[1];
|
|
2730
|
+
|
|
2731
|
+
// src1_row [D, 1, 1, 1] -> input
|
|
2732
|
+
src1_row.ne[1] = 1;
|
|
2733
|
+
src1_row.ne[2] = 1;
|
|
2734
|
+
src1_row.ne[3] = 1;
|
|
2735
|
+
src1_row.nb[2] = nb11;
|
|
2736
|
+
src1_row.nb[3] = nb11;
|
|
2737
|
+
|
|
2738
|
+
// dst_row [M, 1, 1, 1] -> out
|
|
2739
|
+
dst_row.ne[1] = 1;
|
|
2740
|
+
dst_row.ne[2] = 1;
|
|
2741
|
+
dst_row.ne[3] = 1;
|
|
2742
|
+
dst_row.nb[2] = nb1;
|
|
2743
|
+
dst_row.nb[3] = nb1;
|
|
2744
|
+
|
|
2745
|
+
//create weight for one row
|
|
2746
|
+
for (int64_t iid1 = 0; iid1 < ids->ne[1]; iid1++) {
|
|
2747
|
+
for (int64_t id = 0; id < n_ids; id++) {
|
|
2748
|
+
// expert index
|
|
2749
|
+
int32_t i02 = *(int32_t *) (ids_host.data() + iid1*ids->nb[1] + id*ids->nb[0]);
|
|
2750
|
+
GGML_ASSERT(i02 >= 0 && i02 < n_as);
|
|
2751
|
+
|
|
2752
|
+
// If B = 1 (broadcast), always use 0; otherwise, use id.
|
|
2753
|
+
int64_t i11 = (ne11 == 1 ? 0 : id);
|
|
2754
|
+
int64_t i12 = iid1;
|
|
2755
|
+
|
|
2756
|
+
int64_t i1 = id;
|
|
2757
|
+
int64_t i2 = i12;
|
|
2758
|
+
|
|
2759
|
+
void* src0_tmp_ptr = src0_original + i02*ori_src0_nb[2];
|
|
2760
|
+
void* src1_tmp_ptr = src1_original + i11*nb11 + i12*nb12;
|
|
2761
|
+
void* dst_tmp_ptr = dst_original + i1*nb1 + i2*nb2;
|
|
2762
|
+
|
|
2763
|
+
src0_row.data = src0_tmp_ptr;
|
|
2764
|
+
src1_row.data = src1_tmp_ptr;
|
|
2765
|
+
dst_row.data = dst_tmp_ptr;
|
|
2766
|
+
dst_row.src[0] = &src0_row;
|
|
2767
|
+
dst_row.src[1] = &src1_row;
|
|
2768
|
+
|
|
2769
|
+
ggml_cann_mul_mat(ctx, &dst_row);
|
|
2770
|
+
}
|
|
2771
|
+
}
|
|
2772
|
+
return;
|
|
2773
|
+
#endif
|
|
2774
|
+
|
|
2657
2775
|
std::vector<aclTensor*> src0_tensor_vec;
|
|
2658
2776
|
std::vector<aclTensor*> src1_tensor_vec;
|
|
2659
2777
|
std::vector<aclTensor*> dst_tensor_vec;
|
|
@@ -2701,9 +2819,9 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2701
2819
|
}
|
|
2702
2820
|
|
|
2703
2821
|
size_t GROUP_SIZE = 128;
|
|
2704
|
-
//
|
|
2822
|
+
// GroupedMatmulV3 required tensor_list.size < 128
|
|
2705
2823
|
for (size_t i = 0; i < src0_tensor_vec.size(); i += GROUP_SIZE) {
|
|
2706
|
-
// split and call
|
|
2824
|
+
// split and call GroupedMatmulV3
|
|
2707
2825
|
size_t end = std::min(i + GROUP_SIZE, src0_tensor_vec.size());
|
|
2708
2826
|
std::vector<aclTensor*> src0_tensor_vec_split(src0_tensor_vec.begin() + i, src0_tensor_vec.begin() + end);
|
|
2709
2827
|
std::vector<aclTensor*> src1_tensor_vec_split(src1_tensor_vec.begin() + i, src1_tensor_vec.begin() + end);
|
|
@@ -2713,7 +2831,7 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
|
|
|
2713
2831
|
aclTensorList* src1_tensor_list = aclCreateTensorList(src1_tensor_vec_split.data(), src1_tensor_vec_split.size());
|
|
2714
2832
|
aclTensorList* dst_tensor_list = aclCreateTensorList(dst_tensor_vec_split.data(), dst_tensor_vec_split.size());
|
|
2715
2833
|
|
|
2716
|
-
GGML_CANN_CALL_ACLNN_OP(ctx,
|
|
2834
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV3, src1_tensor_list, src0_tensor_list,
|
|
2717
2835
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, -1, dst_tensor_list);
|
|
2718
2836
|
|
|
2719
2837
|
ggml_cann_release_resources(ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list);
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
#ifndef CANN_ACLNN_OPS
|
|
24
24
|
#define CANN_ACLNN_OPS
|
|
25
25
|
|
|
26
|
+
#include <unordered_set>
|
|
26
27
|
#include <functional>
|
|
27
28
|
#include <aclnnop/aclnn_abs.h>
|
|
28
29
|
#include <aclnnop/aclnn_neg.h>
|
|
@@ -1020,6 +1021,37 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
|
|
|
1020
1021
|
*/
|
|
1021
1022
|
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
1022
1023
|
|
|
1024
|
+
/**
|
|
1025
|
+
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
|
|
1026
|
+
*
|
|
1027
|
+
* @details Checks whether the given tensor serves as weight parameters in matrix multiplication operations,
|
|
1028
|
+
* typically within neural network layers. The function maintains a static set of canonical weight
|
|
1029
|
+
* naming suffixes from Transformer-based architectures. Uses substring matching to identify weight
|
|
1030
|
+
* tensors even with hierarchical naming patterns.
|
|
1031
|
+
*
|
|
1032
|
+
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
|
|
1033
|
+
*/
|
|
1034
|
+
static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
1035
|
+
std::string name = ggml_get_name(tensor);
|
|
1036
|
+
static const std::unordered_set<std::string> weight_suffixes{
|
|
1037
|
+
"output.weight",
|
|
1038
|
+
"attn_q.weight",
|
|
1039
|
+
"attn_k.weight",
|
|
1040
|
+
"attn_v.weight",
|
|
1041
|
+
"attn_output.weight",
|
|
1042
|
+
"ffn_gate.weight",
|
|
1043
|
+
"ffn_up.weight",
|
|
1044
|
+
"ffn_down.weight"
|
|
1045
|
+
};
|
|
1046
|
+
|
|
1047
|
+
for (const auto& suffix : weight_suffixes) {
|
|
1048
|
+
if (name.find(suffix) != std::string::npos) {
|
|
1049
|
+
return true;
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
return false;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1023
1055
|
/**
|
|
1024
1056
|
* @brief Applies a element-wise operation to two input tensors using the CANN
|
|
1025
1057
|
* backend.
|
|
@@ -1066,7 +1098,7 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1066
1098
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
|
1067
1099
|
*/
|
|
1068
1100
|
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
1069
|
-
void
|
|
1101
|
+
void ggml_cann_op_unary(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
1070
1102
|
ggml_tensor* src = dst->src[0];
|
|
1071
1103
|
|
|
1072
1104
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|
@@ -1077,49 +1109,125 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
1077
1109
|
}
|
|
1078
1110
|
|
|
1079
1111
|
/**
|
|
1080
|
-
* @brief
|
|
1112
|
+
* @brief Applies a unary operation to a ggml tensor using the CANN backend.
|
|
1081
1113
|
*
|
|
1082
|
-
* @details This function
|
|
1083
|
-
* a user-provided lambda or callable
|
|
1084
|
-
* context and two ACL tensors
|
|
1085
|
-
* creates ACL representations of the ggml tensors and invokes the unary operation.
|
|
1086
|
-
* The result is stored in the destination tensor `dst`. This utility abstracts the
|
|
1087
|
-
* common boilerplate of tensor conversion and cleanup when implementing unary ops.
|
|
1114
|
+
* @details This function applies a unary operation to the input tensor using
|
|
1115
|
+
* a user-provided lambda or callable `unary_op`. The lambda receives the
|
|
1116
|
+
* CANN backend context and two ACL tensors: the source and the destination.
|
|
1088
1117
|
*
|
|
1089
|
-
*
|
|
1090
|
-
*
|
|
1091
|
-
*
|
|
1092
|
-
*
|
|
1118
|
+
* Internally, this function handles the conversion from GGML tensors to ACL tensors,
|
|
1119
|
+
* calls the provided unary op, and manages resource cleanup. The input is assumed
|
|
1120
|
+
* to be `dst->src[0]`, and the result is written to `dst`.
|
|
1121
|
+
*
|
|
1122
|
+
* This utility simplifies writing unary op wrappers by abstracting tensor preparation.
|
|
1123
|
+
*
|
|
1124
|
+
* @param unary_op A callable that performs the unary operation using CANN ACL APIs.
|
|
1125
|
+
* @param ctx The CANN context for operation execution.
|
|
1126
|
+
* @param dst The destination ggml_tensor where the result will be stored.
|
|
1127
|
+
* The input tensor is assumed to be `dst->src[0]`.
|
|
1128
|
+
*
|
|
1129
|
+
* @see GGML_CANN_CALL_OP_UNARY
|
|
1130
|
+
*/
|
|
1131
|
+
void ggml_cann_op_unary(
|
|
1132
|
+
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
1133
|
+
ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
1134
|
+
|
|
1135
|
+
/**
|
|
1136
|
+
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
|
|
1137
|
+
*
|
|
1138
|
+
* @details This function performs a gated activation such as GEGLU or ReGLU.
|
|
1139
|
+
* It supports two input modes:
|
|
1140
|
+
*
|
|
1141
|
+
* 1. **Dual input mode**: `dst->src[0]` and `dst->src[1]` are both valid tensors.
|
|
1142
|
+
* These are used directly as the value and gate tensors.
|
|
1143
|
+
*
|
|
1144
|
+
* 2. **Packed input mode**: Only `dst->src[0]` is valid, and it is assumed to
|
|
1145
|
+
* contain a concatenation of value and gate along the first dimension. This tensor
|
|
1146
|
+
* will be split into two equal halves to form the value and gate inputs.
|
|
1147
|
+
*
|
|
1148
|
+
* The function applies a user-provided unary operation (e.g., GELU) to the value tensor,
|
|
1149
|
+
* then multiplies the result in-place with the gate tensor:
|
|
1150
|
+
*
|
|
1151
|
+
* @code
|
|
1152
|
+
* dst = unary_op(value) * gate;
|
|
1153
|
+
* @endcode
|
|
1154
|
+
*
|
|
1155
|
+
* The `swapped` parameter (from `dst->op_params[1]`) allows flipping the
|
|
1156
|
+
* order of value/gate in the packed input case.
|
|
1157
|
+
*
|
|
1158
|
+
* @param unary_op A callable that performs the unary operation using CANN ACL APIs.
|
|
1159
|
+
* It receives (ctx, acl_value_tensor, acl_output_tensor).
|
|
1160
|
+
* @param ctx The CANN context used for execution.
|
|
1161
|
+
* @param dst The destination ggml_tensor. Source tensors are in `dst->src[0]` and optionally `src[1]`.
|
|
1162
|
+
*
|
|
1163
|
+
* @see GGML_CANN_CALL_OP_UNARY_GATED
|
|
1093
1164
|
*/
|
|
1094
|
-
void
|
|
1165
|
+
void ggml_cann_op_unary_gated(
|
|
1095
1166
|
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
1096
1167
|
ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
1097
1168
|
|
|
1098
1169
|
/**
|
|
1099
|
-
* @brief Helper macro to
|
|
1170
|
+
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
|
|
1100
1171
|
*
|
|
1101
|
-
* This macro
|
|
1102
|
-
* and passes it to the
|
|
1103
|
-
*
|
|
1172
|
+
* This macro wraps the specified ACLNN unary operator name into a lambda expression,
|
|
1173
|
+
* and passes it to `ggml_cann_op_unary`, which handles the common logic for executing
|
|
1174
|
+
* unary ops in the CANN backend.
|
|
1104
1175
|
*
|
|
1105
|
-
* Internally,
|
|
1176
|
+
* Internally, this macro expands to a lambda like:
|
|
1106
1177
|
* @code
|
|
1107
|
-
*
|
|
1178
|
+
* [](ggml_backend_cann_context& ctx, aclTensor* acl_src, aclTensor* acl_dst) {
|
|
1179
|
+
* GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);
|
|
1180
|
+
* };
|
|
1108
1181
|
* @endcode
|
|
1109
1182
|
*
|
|
1183
|
+
* This lambda is then passed to `ggml_cann_op_unary`, which applies the operation.
|
|
1184
|
+
*
|
|
1110
1185
|
* @param OP_NAME The name of the ACL unary operator to invoke via GGML_CANN_CALL_ACLNN_OP.
|
|
1111
1186
|
*
|
|
1112
|
-
* @see
|
|
1187
|
+
* @see ggml_cann_op_unary
|
|
1113
1188
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1114
1189
|
*/
|
|
1115
|
-
#define
|
|
1190
|
+
#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
1116
1191
|
do { \
|
|
1117
1192
|
auto lambda = [](ggml_backend_cann_context& ctx, \
|
|
1118
1193
|
aclTensor* acl_src, \
|
|
1119
1194
|
aclTensor* acl_dst) { \
|
|
1120
1195
|
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1121
1196
|
}; \
|
|
1122
|
-
|
|
1197
|
+
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1123
1198
|
} \
|
|
1124
1199
|
while (0)
|
|
1200
|
+
|
|
1201
|
+
/**
|
|
1202
|
+
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
|
|
1203
|
+
*
|
|
1204
|
+
* This macro wraps the specified ACLNN unary operator name into a lambda expression,
|
|
1205
|
+
* and passes it to `ggml_cann_op_unary_gated`, which handles the common logic for
|
|
1206
|
+
* executing gated unary ops in the CANN backend.
|
|
1207
|
+
*
|
|
1208
|
+
* Internally, this macro expands to a lambda like:
|
|
1209
|
+
* @code
|
|
1210
|
+
* [](ggml_backend_cann_context& ctx, aclTensor* acl_src, aclTensor* acl_dst) {
|
|
1211
|
+
* GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);
|
|
1212
|
+
* };
|
|
1213
|
+
* @endcode
|
|
1214
|
+
*
|
|
1215
|
+
* This lambda is then passed to `ggml_cann_op_unary_gated`, which applies the operation.
|
|
1216
|
+
*
|
|
1217
|
+
* @param OP_NAME The name of the ACL unary operator to invoke via GGML_CANN_CALL_ACLNN_OP.
|
|
1218
|
+
*
|
|
1219
|
+
* @see ggml_cann_op_unary_gated
|
|
1220
|
+
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1221
|
+
*/
|
|
1222
|
+
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
1223
|
+
do { \
|
|
1224
|
+
auto lambda = [](ggml_backend_cann_context& ctx, \
|
|
1225
|
+
aclTensor* acl_src, \
|
|
1226
|
+
aclTensor* acl_dst) { \
|
|
1227
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1228
|
+
}; \
|
|
1229
|
+
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1230
|
+
} \
|
|
1231
|
+
while (0)
|
|
1232
|
+
|
|
1125
1233
|
#endif // CANN_ACLNN_OPS
|