whispercpp 1.3.4 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +158 -44
- data/ext/extconf.rb +3 -2
- data/ext/ruby_whisper.c +34 -6
- data/ext/ruby_whisper.h +67 -0
- data/ext/ruby_whisper_context.c +236 -144
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +12 -13
- data/ext/ruby_whisper_params.c +47 -24
- data/ext/ruby_whisper_segment.c +84 -20
- data/ext/ruby_whisper_token.c +371 -0
- data/ext/ruby_whisper_transcribe.cpp +5 -2
- data/ext/ruby_whisper_vad_context.c +122 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +51 -0
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +138 -0
- data/ext/ruby_whisper_vad_segments.c +105 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +129 -112
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +28 -15
- data/ext/sources/examples/talk-llama/CMakeLists.txt +8 -3
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +5 -2
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2378 -1988
- data/ext/sources/examples/talk-llama/llama-arch.h +109 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +78 -34
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +100 -4
- data/ext/sources/examples/talk-llama/llama-chat.h +5 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +1088 -403
- data/ext/sources/examples/talk-llama/llama-context.h +70 -23
- data/ext/sources/examples/talk-llama/llama-cparams.h +6 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +295 -60
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +925 -155
- data/ext/sources/examples/talk-llama/llama-graph.h +234 -23
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +79 -38
- data/ext/sources/examples/talk-llama/llama-hparams.h +118 -18
- data/ext/sources/examples/talk-llama/llama-impl.cpp +11 -7
- data/ext/sources/examples/talk-llama/llama-impl.h +14 -2
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +8 -4
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +405 -140
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +24 -10
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +42 -31
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +197 -45
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +606 -116
- data/ext/sources/examples/talk-llama/llama-model-loader.h +41 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +61 -44
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +2756 -13643
- data/ext/sources/examples/talk-llama/llama-model.h +112 -18
- data/ext/sources/examples/talk-llama/llama-quant.cpp +582 -365
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +1409 -199
- data/ext/sources/examples/talk-llama/llama-sampler.h +42 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +248 -82
- data/ext/sources/examples/talk-llama/llama-vocab.h +50 -40
- data/ext/sources/examples/talk-llama/llama.cpp +802 -21
- data/ext/sources/examples/talk-llama/llama.h +210 -39
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +190 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +137 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +143 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +133 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +184 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +142 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +262 -0
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +148 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +157 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +195 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +210 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +139 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +153 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba-base.cpp +289 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +54 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +129 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +200 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +704 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +109 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +320 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +169 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +131 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +525 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +140 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +164 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +137 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +121 -79
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +90 -56
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +5 -2
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +6 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +14 -12
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +246 -21
- data/ext/sources/ggml/src/CMakeLists.txt +85 -11
- data/ext/sources/ggml/src/ggml-alloc.c +128 -50
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +54 -88
- data/ext/sources/ggml/src/ggml-backend.cpp +76 -23
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +18 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +11 -11
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +58 -46
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +139 -48
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2427 -1785
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -362
- data/ext/sources/ggml/src/ggml-cann/common.h +285 -211
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +663 -831
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +170 -95
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -18
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +513 -27
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +4192 -992
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1761 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +124 -24
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +157 -28
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +8 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +251 -80
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +19 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +587 -119
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1093 -194
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1284 -203
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1519 -527
- data/ext/sources/ggml/src/ggml-cpu/ops.h +6 -4
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3632 -781
- data/ext/sources/ggml/src/ggml-cpu/repack.h +129 -4
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +152 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +152 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +140 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +132 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +33 -31
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +474 -85
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +342 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +98 -74
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +973 -665
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1255 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +33 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +40 -18
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +206 -45
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1688 -302
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +908 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +88 -20
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +502 -90
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +532 -193
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +460 -104
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +5 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +360 -122
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +2 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +73 -39
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +152 -1
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +364 -149
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +163 -41
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +68 -50
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +22 -4
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +95 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +275 -119
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -7
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +160 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +38 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3325 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +46 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +813 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +891 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +713 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +155 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +26 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1199 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2670 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +497 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +419 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +382 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -13
- data/ext/sources/ggml/src/ggml-impl.h +129 -6
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +15 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +173 -34
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +912 -344
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +124 -59
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +588 -144
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +396 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1724 -421
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +16 -3
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +333 -114
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3050 -1539
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +30 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4279 -497
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +267 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +59 -87
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +81 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -29
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +427 -20
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +103 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +867 -50
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +401 -358
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +645 -155
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +221 -66
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +457 -281
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +71 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +39 -19
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5994 -3055
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +18 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +386 -160
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +82 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +400 -174
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +123 -37
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +10 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +13 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +77 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +88 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +41 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +92 -230
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +49 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +8 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +8 -32
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +8 -32
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +8 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +384 -180
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1374 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2544 -726
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +73 -15
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +72 -261
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +766 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +147 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +196 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- data/ext/sources/ggml/src/ggml.c +590 -64
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +106 -62
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +162 -4
- data/test/test_context_params.rb +82 -0
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +81 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +100 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +27 -0
- data/whispercpp.gemspec +1 -1
- metadata +502 -37
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/examples/talk-llama/llama-sampling.h +0 -32
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Copyright (c) 2023-
|
|
2
|
+
* Copyright (c) 2023-2026 The ggml authors
|
|
3
3
|
*
|
|
4
4
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
5
|
* of this software and associated documentation files (the "Software"), to
|
|
@@ -23,31 +23,36 @@
|
|
|
23
23
|
#ifndef CANN_ACLNN_OPS
|
|
24
24
|
#define CANN_ACLNN_OPS
|
|
25
25
|
|
|
26
|
-
#include
|
|
27
|
-
#include
|
|
26
|
+
#include "acl_tensor.h"
|
|
27
|
+
#include "common.h"
|
|
28
|
+
|
|
28
29
|
#include <aclnnop/aclnn_abs.h>
|
|
29
|
-
#include <aclnnop/aclnn_neg.h>
|
|
30
|
-
#include <aclnnop/aclnn_exp.h>
|
|
31
30
|
#include <aclnnop/aclnn_arange.h>
|
|
32
31
|
#include <aclnnop/aclnn_argsort.h>
|
|
33
32
|
#include <aclnnop/aclnn_cat.h>
|
|
34
33
|
#include <aclnnop/aclnn_clamp.h>
|
|
34
|
+
#include <aclnnop/aclnn_cos.h>
|
|
35
|
+
#include <aclnnop/aclnn_exp.h>
|
|
35
36
|
#include <aclnnop/aclnn_gelu.h>
|
|
36
37
|
#include <aclnnop/aclnn_gelu_v2.h>
|
|
37
|
-
#include <aclnnop/aclnn_sigmoid.h>
|
|
38
38
|
#include <aclnnop/aclnn_hardsigmoid.h>
|
|
39
39
|
#include <aclnnop/aclnn_hardswish.h>
|
|
40
40
|
#include <aclnnop/aclnn_leaky_relu.h>
|
|
41
|
+
#include <aclnnop/aclnn_log.h>
|
|
42
|
+
#include <aclnnop/aclnn_logsoftmax.h>
|
|
43
|
+
#include <aclnnop/aclnn_neg.h>
|
|
44
|
+
#include <aclnnop/aclnn_norm.h>
|
|
41
45
|
#include <aclnnop/aclnn_relu.h>
|
|
46
|
+
#include <aclnnop/aclnn_sigmoid.h>
|
|
47
|
+
#include <aclnnop/aclnn_sign.h>
|
|
42
48
|
#include <aclnnop/aclnn_silu.h>
|
|
43
|
-
#include <aclnnop/aclnn_tanh.h>
|
|
44
|
-
#include <aclnnop/aclnn_sqrt.h>
|
|
45
49
|
#include <aclnnop/aclnn_sin.h>
|
|
46
|
-
#include <aclnnop/
|
|
47
|
-
#include <aclnnop/
|
|
48
|
-
#include <aclnnop/
|
|
49
|
-
|
|
50
|
-
#include
|
|
50
|
+
#include <aclnnop/aclnn_slice.h>
|
|
51
|
+
#include <aclnnop/aclnn_sqrt.h>
|
|
52
|
+
#include <aclnnop/aclnn_tanh.h>
|
|
53
|
+
|
|
54
|
+
#include <functional>
|
|
55
|
+
#include <unordered_set>
|
|
51
56
|
|
|
52
57
|
/**
|
|
53
58
|
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
|
@@ -62,7 +67,7 @@
|
|
|
62
67
|
* @param dst The ggml tensor representing the destination, which op is
|
|
63
68
|
* GGML_OP_REPEAT and specifies the desired dimensions.
|
|
64
69
|
*/
|
|
65
|
-
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
70
|
+
void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
66
71
|
|
|
67
72
|
/**
|
|
68
73
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|
@@ -82,7 +87,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
82
87
|
* @param dst The destination tensor where the result of the Leaky ReLU
|
|
83
88
|
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
|
84
89
|
*/
|
|
85
|
-
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
90
|
+
void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
86
91
|
|
|
87
92
|
/**
|
|
88
93
|
* @brief Concatenates multiple tensors along a specified dimension using the
|
|
@@ -97,7 +102,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
97
102
|
* @attention tensorList length should be 2 and the dimension using for concat
|
|
98
103
|
* default to 1.
|
|
99
104
|
*/
|
|
100
|
-
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
105
|
+
void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
101
106
|
|
|
102
107
|
/**
|
|
103
108
|
* @brief Generates a sequence of evenly spaced values within a specified
|
|
@@ -113,7 +118,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
113
118
|
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
|
114
119
|
* `GGML_OP_ARANGE`.
|
|
115
120
|
*/
|
|
116
|
-
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
121
|
+
void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
117
122
|
|
|
118
123
|
/**
|
|
119
124
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
|
@@ -131,7 +136,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
131
136
|
* @param dst The destination tensor where the clamped values will be stored.
|
|
132
137
|
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
|
133
138
|
*/
|
|
134
|
-
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
139
|
+
void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
135
140
|
|
|
136
141
|
/**
|
|
137
142
|
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
|
@@ -148,7 +153,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
148
153
|
* @param dst The destination tensor where the scaled values will be stored.
|
|
149
154
|
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
|
150
155
|
*/
|
|
151
|
-
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
156
|
+
void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
152
157
|
|
|
153
158
|
/**
|
|
154
159
|
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
|
@@ -163,7 +168,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
163
168
|
* @param dst The destination tensor where the sorted indices will be stored.
|
|
164
169
|
* dst->op is `GGML_OP_ARGSORT`.
|
|
165
170
|
*/
|
|
166
|
-
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
171
|
+
void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
167
172
|
|
|
168
173
|
/**
|
|
169
174
|
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
|
@@ -185,7 +190,67 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
185
190
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
186
191
|
* @attention `Var` defaults to dst->ne[0].
|
|
187
192
|
*/
|
|
188
|
-
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
193
|
+
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
|
|
197
|
+
* backend.
|
|
198
|
+
*
|
|
199
|
+
* @details This function applies the L2 Normalization operation on the
|
|
200
|
+
* input tensor `src` and stores the result in the destination tensor
|
|
201
|
+
* `dst`. L2 Normalization scales the input tensor such that the
|
|
202
|
+
* L2 norm along the specified dimension equals 1. This operation
|
|
203
|
+
* is commonly used in neural networks for feature normalization
|
|
204
|
+
* and vector scaling.
|
|
205
|
+
* The operation is defined as:
|
|
206
|
+
* \f[
|
|
207
|
+
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
|
|
208
|
+
* \f]
|
|
209
|
+
* The normalization is performed along the last dimension by default.
|
|
210
|
+
*
|
|
211
|
+
* @param ctx The CANN context used for operations.
|
|
212
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
|
213
|
+
* @attention The normalization is performed along the last dimension of the
|
|
214
|
+
* input tensor by default.
|
|
215
|
+
*/
|
|
216
|
+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
|
|
220
|
+
* backend.
|
|
221
|
+
*
|
|
222
|
+
* @details This function computes the cross entropy loss between the predicted
|
|
223
|
+
* logits and target probability distributions. The operation follows
|
|
224
|
+
* the same computation pattern as the CPU implementation:
|
|
225
|
+
* 1. Applies log_softmax to the logits along the class dimension
|
|
226
|
+
* 2. Element-wise multiplication with target distributions
|
|
227
|
+
* 3. Summation along the class dimension to get per-sample losses
|
|
228
|
+
* 4. Global summation and scaling by -1/nr to get final loss
|
|
229
|
+
*
|
|
230
|
+
* The computation can be expressed as:
|
|
231
|
+
* \f[
|
|
232
|
+
* \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
|
|
233
|
+
* \f]
|
|
234
|
+
* where \f$N\f$ is the total number of samples, \f$C\f$ is the number
|
|
235
|
+
* of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
|
|
236
|
+
* probability distributions.
|
|
237
|
+
*
|
|
238
|
+
* @param ctx The CANN context used for operations.
|
|
239
|
+
* @param dst The destination tensor where the computed loss will be stored.
|
|
240
|
+
* This should be a scalar tensor containing the final loss value.
|
|
241
|
+
*
|
|
242
|
+
* @note This implementation computes cross entropy between probability
|
|
243
|
+
* distributions, not the typical classification cross entropy that
|
|
244
|
+
* expects class indices as targets. Both input tensors (src0 and src1)
|
|
245
|
+
* should have the same shape and represent probability distributions
|
|
246
|
+
* over the class dimension.
|
|
247
|
+
* @note The function expects two source tensors:
|
|
248
|
+
* - dst->src[0]: Logits tensor (before softmax)
|
|
249
|
+
* - dst->src[1]: Target probability distributions tensor
|
|
250
|
+
* @note The computation is performed using CANN backend operators including
|
|
251
|
+
* LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
|
|
252
|
+
*/
|
|
253
|
+
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
189
254
|
|
|
190
255
|
/**
|
|
191
256
|
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
|
@@ -209,7 +274,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
209
274
|
*
|
|
210
275
|
* @attention eps defaults to 1e-6f.
|
|
211
276
|
*/
|
|
212
|
-
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
277
|
+
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
213
278
|
|
|
214
279
|
/**
|
|
215
280
|
* @brief Computes the accumulation of tensors using the CANN backend.
|
|
@@ -228,7 +293,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
228
293
|
* @param dst The destination tensor where the accumulated values will be stored.
|
|
229
294
|
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
|
230
295
|
*/
|
|
231
|
-
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
296
|
+
void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
232
297
|
|
|
233
298
|
/**
|
|
234
299
|
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
|
@@ -244,7 +309,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
244
309
|
*
|
|
245
310
|
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
|
246
311
|
*/
|
|
247
|
-
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
312
|
+
void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
248
313
|
|
|
249
314
|
/**
|
|
250
315
|
* @brief Computes the sum of elements in a ggml tensor.
|
|
@@ -258,7 +323,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
258
323
|
*
|
|
259
324
|
*/
|
|
260
325
|
|
|
261
|
-
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
326
|
+
void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
262
327
|
|
|
263
328
|
/**
|
|
264
329
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|
@@ -274,8 +339,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
274
339
|
* @param dst The destination tensor where the upsampled values will be stored.
|
|
275
340
|
* dst->op is `GGML_OP_UPSCALE`.
|
|
276
341
|
*/
|
|
277
|
-
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
278
|
-
ggml_tensor* dst);
|
|
342
|
+
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
279
343
|
|
|
280
344
|
/**
|
|
281
345
|
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
|
@@ -290,7 +354,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
290
354
|
* @param dst The destination tensor, which specifies the target dimensions for
|
|
291
355
|
* padding. dst->op is `GGML_OP_PAD`.
|
|
292
356
|
*/
|
|
293
|
-
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
357
|
+
void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
294
358
|
|
|
295
359
|
/**
|
|
296
360
|
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
|
@@ -307,7 +371,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
307
371
|
* @param dst The destination tensor on which the pooling operation is to be
|
|
308
372
|
* performed. dst->op is `GGML_OP_POOL_2D`.
|
|
309
373
|
*/
|
|
310
|
-
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
374
|
+
void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
311
375
|
|
|
312
376
|
/**
|
|
313
377
|
* @brief Duplicates a ggml tensor using the CANN backend.
|
|
@@ -326,7 +390,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
326
390
|
* different shape and dst is no-contiguous.
|
|
327
391
|
* @note: This func need to simplify.
|
|
328
392
|
*/
|
|
329
|
-
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
393
|
+
void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
330
394
|
|
|
331
395
|
/**
|
|
332
396
|
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
|
@@ -348,7 +412,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
348
412
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
349
413
|
* dst->op is `GGML_OP_RMS_NORM`.
|
|
350
414
|
*/
|
|
351
|
-
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
415
|
+
void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
352
416
|
|
|
353
417
|
/**
|
|
354
418
|
* @brief Applies a diagonal mask to the tensor with a specified value.
|
|
@@ -363,7 +427,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
363
427
|
* `GGML_OP_DIAG_MASK`
|
|
364
428
|
* @param value The value to use for masking.
|
|
365
429
|
*/
|
|
366
|
-
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
|
430
|
+
void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
|
|
367
431
|
|
|
368
432
|
/**
|
|
369
433
|
* @brief Performs an image-to-column transformation on the input tensor.
|
|
@@ -378,7 +442,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
|
|
|
378
442
|
* @param dst The destination tensor that stores the result of the operation.
|
|
379
443
|
* dst->op is `GGML_OP_IM2COL`.
|
|
380
444
|
*/
|
|
381
|
-
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
445
|
+
void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
382
446
|
|
|
383
447
|
/**
|
|
384
448
|
* @brief Computes time step embeddings using sine and cosine functions.
|
|
@@ -392,10 +456,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
392
456
|
* @param dst The destination tensor where the result of the embedding operation
|
|
393
457
|
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
|
394
458
|
*/
|
|
395
|
-
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
459
|
+
void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
396
460
|
|
|
397
461
|
// @see ggml_cann_dup.
|
|
398
|
-
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
462
|
+
void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
399
463
|
|
|
400
464
|
/**
|
|
401
465
|
* @brief Computes the softmax activation with optional masking.
|
|
@@ -417,7 +481,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
417
481
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|
418
482
|
* `GGML_OP_SOFTMAX`.
|
|
419
483
|
*/
|
|
420
|
-
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
484
|
+
void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
421
485
|
|
|
422
486
|
/**
|
|
423
487
|
* @brief Extracts specific rows from a tensor based on indices.
|
|
@@ -429,7 +493,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
429
493
|
* @param ctx The backend CANN context for executing operations.
|
|
430
494
|
* @param dst The destination tensor where the extracted rows will be stored.
|
|
431
495
|
*/
|
|
432
|
-
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
496
|
+
void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
433
497
|
|
|
434
498
|
/**
|
|
435
499
|
* @brief Writes specific rows into a tensor at positions specified by indices.
|
|
@@ -441,7 +505,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
441
505
|
* @param ctx The backend CANN context for executing operations.
|
|
442
506
|
* @param dst The destination tensor where the specified rows will be updated.
|
|
443
507
|
*/
|
|
444
|
-
void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
508
|
+
void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
445
509
|
|
|
446
510
|
/**
|
|
447
511
|
* @brief Executes matrix multiplication for the given tensor.
|
|
@@ -454,7 +518,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
454
518
|
* @param dst The destination tensor for storing the result of the matrix
|
|
455
519
|
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
|
456
520
|
*/
|
|
457
|
-
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
521
|
+
void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
458
522
|
|
|
459
523
|
/**
|
|
460
524
|
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
|
@@ -477,7 +541,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
477
541
|
* @note The function currently does not support cases where the freq_scale is
|
|
478
542
|
* not equal 1.
|
|
479
543
|
*/
|
|
480
|
-
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
544
|
+
void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
481
545
|
|
|
482
546
|
/**
|
|
483
547
|
* @brief Computes the index of the maximum value along the specified dimension
|
|
@@ -492,7 +556,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
492
556
|
* @param dst The destination tensor where the indices of the maximum values will
|
|
493
557
|
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
|
494
558
|
*/
|
|
495
|
-
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
559
|
+
void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
496
560
|
|
|
497
561
|
/**
|
|
498
562
|
* @brief Adds two tensors element-wise and stores the result in a destination
|
|
@@ -509,8 +573,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
509
573
|
* @param acl_src1 The second source tensor.
|
|
510
574
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
511
575
|
*/
|
|
512
|
-
void aclnn_add(ggml_backend_cann_context& ctx,
|
|
513
|
-
|
|
576
|
+
void aclnn_add(ggml_backend_cann_context & ctx,
|
|
577
|
+
aclTensor * acl_src0,
|
|
578
|
+
aclTensor * acl_src1,
|
|
579
|
+
aclTensor * acl_dst = nullptr);
|
|
514
580
|
|
|
515
581
|
/**
|
|
516
582
|
* @brief Sub two tensors element-wise and stores the result in a destination
|
|
@@ -527,8 +593,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
527
593
|
* @param acl_src1 The second source tensor.
|
|
528
594
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
529
595
|
*/
|
|
530
|
-
void aclnn_sub(ggml_backend_cann_context& ctx,
|
|
531
|
-
|
|
596
|
+
void aclnn_sub(ggml_backend_cann_context & ctx,
|
|
597
|
+
aclTensor * acl_src0,
|
|
598
|
+
aclTensor * acl_src1,
|
|
599
|
+
aclTensor * acl_dst = nullptr);
|
|
532
600
|
|
|
533
601
|
/**
|
|
534
602
|
* @brief Performs element-wise multiplication of two tensors and stores the
|
|
@@ -546,8 +614,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
546
614
|
* @param acl_other The second tensor for element-wise multiplication.
|
|
547
615
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
548
616
|
*/
|
|
549
|
-
void aclnn_mul(ggml_backend_cann_context& ctx,
|
|
550
|
-
|
|
617
|
+
void aclnn_mul(ggml_backend_cann_context & ctx,
|
|
618
|
+
aclTensor * acl_src,
|
|
619
|
+
aclTensor * acl_other,
|
|
620
|
+
aclTensor * acl_dst = nullptr);
|
|
551
621
|
|
|
552
622
|
/**
|
|
553
623
|
* @brief Matrix division, optionally in-place.
|
|
@@ -567,8 +637,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
567
637
|
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
568
638
|
* `acl_src`.
|
|
569
639
|
*/
|
|
570
|
-
void aclnn_div(ggml_backend_cann_context& ctx,
|
|
571
|
-
|
|
640
|
+
void aclnn_div(ggml_backend_cann_context & ctx,
|
|
641
|
+
aclTensor * acl_src,
|
|
642
|
+
aclTensor * acl_other,
|
|
643
|
+
aclTensor * acl_dst = nullptr);
|
|
572
644
|
|
|
573
645
|
/**
|
|
574
646
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
|
@@ -584,8 +656,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
584
656
|
* @param acl_dst The destination tensor where the cosine results will be
|
|
585
657
|
* stored.
|
|
586
658
|
*/
|
|
587
|
-
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
588
|
-
aclTensor* acl_dst);
|
|
659
|
+
void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
589
660
|
|
|
590
661
|
/**
|
|
591
662
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
|
@@ -602,8 +673,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
602
673
|
* @param acl_src The source tensor on which the sine function will be applied.
|
|
603
674
|
* @param acl_dst The destination tensor where the sine results will be stored.
|
|
604
675
|
*/
|
|
605
|
-
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
606
|
-
aclTensor* acl_dst);
|
|
676
|
+
void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
607
677
|
|
|
608
678
|
/**
|
|
609
679
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
|
@@ -621,8 +691,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
621
691
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
|
622
692
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
|
623
693
|
*/
|
|
624
|
-
void bcast_shape(ggml_tensor *
|
|
625
|
-
|
|
694
|
+
void bcast_shape(ggml_tensor * src0,
|
|
695
|
+
ggml_tensor * src1,
|
|
696
|
+
ggml_tensor * dst,
|
|
697
|
+
acl_tensor_ptr & acl_src0,
|
|
698
|
+
acl_tensor_ptr & acl_src1,
|
|
699
|
+
acl_tensor_ptr & acl_dst);
|
|
626
700
|
|
|
627
701
|
/**
|
|
628
702
|
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
|
@@ -637,7 +711,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
|
|
637
711
|
* @param dst The destination tensor where the transposed convolution result
|
|
638
712
|
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
|
639
713
|
*/
|
|
640
|
-
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
714
|
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
641
715
|
|
|
642
716
|
/**
|
|
643
717
|
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
|
@@ -662,7 +736,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|
|
662
736
|
* @param dst The destination tensor where the ELU-activated result will be stored.
|
|
663
737
|
* dst->op is expected to be `GGML_OP_ELU`.
|
|
664
738
|
*/
|
|
665
|
-
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
739
|
+
void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
666
740
|
|
|
667
741
|
/**
|
|
668
742
|
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
|
|
@@ -677,7 +751,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
677
751
|
* @param dst The destination tensor where the mean result will be stored.
|
|
678
752
|
* dst->op is expected to be `GGML_OP_MEAN`.
|
|
679
753
|
*/
|
|
680
|
-
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
754
|
+
void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
681
755
|
|
|
682
756
|
/**
|
|
683
757
|
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
|
|
@@ -692,7 +766,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
692
766
|
* @param dst The destination tensor where the padded result will be stored.
|
|
693
767
|
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
|
|
694
768
|
*/
|
|
695
|
-
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
769
|
+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
696
770
|
|
|
697
771
|
/**
|
|
698
772
|
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
|
|
@@ -708,7 +782,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
708
782
|
* @param dst The destination tensor where the result will be stored.
|
|
709
783
|
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
|
|
710
784
|
*/
|
|
711
|
-
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
785
|
+
void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
712
786
|
|
|
713
787
|
/**
|
|
714
788
|
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
|
|
@@ -723,7 +797,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
723
797
|
* @param dst The destination tensor where the result will be stored.
|
|
724
798
|
* dst->op is expected to be `GGML_OP_STEP`.
|
|
725
799
|
*/
|
|
726
|
-
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
800
|
+
void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
727
801
|
|
|
728
802
|
/**
|
|
729
803
|
* @brief Performs the Flash Attention extended operator using the CANN backend.
|
|
@@ -738,167 +812,22 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
738
812
|
* @param dst The destination tensor where the result will be stored.
|
|
739
813
|
* dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
|
|
740
814
|
*/
|
|
741
|
-
void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
742
|
-
|
|
743
|
-
/*
|
|
744
|
-
* @brief A generic wrapper for ACL resources with custom deleter support.
|
|
745
|
-
*/
|
|
746
|
-
using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
|
|
815
|
+
void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
747
816
|
|
|
748
817
|
/**
|
|
749
|
-
* @brief
|
|
818
|
+
* @brief Forward Gated Linear Attention on the CANN backend.
|
|
750
819
|
*
|
|
751
|
-
*
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
/**
|
|
757
|
-
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
|
|
758
|
-
*/
|
|
759
|
-
template<>
|
|
760
|
-
struct acl_resource_traits<aclTensor> {
|
|
761
|
-
static void destroy(void* p) {
|
|
762
|
-
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
|
|
763
|
-
}
|
|
764
|
-
};
|
|
765
|
-
|
|
766
|
-
/**
|
|
767
|
-
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
|
|
768
|
-
*/
|
|
769
|
-
template<>
|
|
770
|
-
struct acl_resource_traits<aclIntArray> {
|
|
771
|
-
static void destroy(void* p) {
|
|
772
|
-
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
|
|
773
|
-
}
|
|
774
|
-
};
|
|
775
|
-
|
|
776
|
-
/**
|
|
777
|
-
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
|
|
778
|
-
*/
|
|
779
|
-
template<>
|
|
780
|
-
struct acl_resource_traits<aclScalar> {
|
|
781
|
-
static void destroy(void* p) {
|
|
782
|
-
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
|
|
783
|
-
}
|
|
784
|
-
};
|
|
785
|
-
|
|
786
|
-
/**
|
|
787
|
-
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
|
|
788
|
-
*/
|
|
789
|
-
template<>
|
|
790
|
-
struct acl_resource_traits<aclTensorList> {
|
|
791
|
-
static void destroy(void* p) {
|
|
792
|
-
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
|
|
793
|
-
}
|
|
794
|
-
};
|
|
795
|
-
|
|
796
|
-
/**
|
|
797
|
-
* @brief Creates a generic ACL resource wrapper with proper destruction logic.
|
|
820
|
+
* Expects dst->src[0..4] = {k, v, q, g, s} with shape conventions:
|
|
821
|
+
* k, v, q, g: [D] with outer dims T x H batched as ne[2]=T, ne[1]=H
|
|
822
|
+
* s: initial state [B, H, D, D], where B is batch and D=C/H
|
|
823
|
+
* dst holds both outputs (o) and updated state; a scale factor is read from op params.
|
|
798
824
|
*
|
|
799
|
-
*
|
|
800
|
-
* @param ptr Raw pointer to ACL resource.
|
|
801
|
-
* @return any_acl_resource Smart pointer that handles destruction.
|
|
802
|
-
*/
|
|
803
|
-
template<typename T>
|
|
804
|
-
any_acl_resource make_acl_resource(T* ptr) {
|
|
805
|
-
return any_acl_resource(
|
|
806
|
-
static_cast<void*>(ptr),
|
|
807
|
-
[](void* p) {
|
|
808
|
-
acl_resource_traits<T>::destroy(p);
|
|
809
|
-
}
|
|
810
|
-
);
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
/**
|
|
814
|
-
* @brief Registers multiple ACL resources into a vector for lifetime management.
|
|
825
|
+
* The kernel updates per time step l: S_new = g ⊗ S_old + k ⊗ v, then computes o = (S_new^T q) * scale.
|
|
815
826
|
*
|
|
816
|
-
* @
|
|
817
|
-
* @param
|
|
818
|
-
* @param args Raw pointers to ACL resources.
|
|
819
|
-
*/
|
|
820
|
-
template<typename... Args>
|
|
821
|
-
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
|
|
822
|
-
(vec.emplace_back(make_acl_resource(args)), ...);
|
|
823
|
-
}
|
|
824
|
-
|
|
825
|
-
/**
|
|
826
|
-
* @brief Task class that wraps the execution of an aclnn function call.
|
|
827
|
-
*/
|
|
828
|
-
class aclnn_task : public cann_task {
|
|
829
|
-
public:
|
|
830
|
-
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
|
|
831
|
-
uint64_t workspace_size, aclOpExecutor * executor,
|
|
832
|
-
aclrtStream stream) :
|
|
833
|
-
aclnn_func_(aclnn_func),
|
|
834
|
-
workspace_addr_(workspace_addr),
|
|
835
|
-
workspace_size_(workspace_size),
|
|
836
|
-
executor_(executor),
|
|
837
|
-
stream_(stream) {}
|
|
838
|
-
virtual void run_task() override {
|
|
839
|
-
ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
|
|
840
|
-
}
|
|
841
|
-
private:
|
|
842
|
-
aclnn_func_t aclnn_func_;
|
|
843
|
-
void * workspace_addr_;
|
|
844
|
-
uint64_t workspace_size_;
|
|
845
|
-
aclOpExecutor * executor_;
|
|
846
|
-
aclrtStream stream_;
|
|
847
|
-
};
|
|
848
|
-
|
|
849
|
-
/**
|
|
850
|
-
* @brief Task class that releases ACL resources after usage.
|
|
851
|
-
*/
|
|
852
|
-
class release_resource_task : public cann_task {
|
|
853
|
-
public:
|
|
854
|
-
release_resource_task(std::vector<any_acl_resource>&& resources){
|
|
855
|
-
resource_ = std::move(resources);
|
|
856
|
-
}
|
|
857
|
-
|
|
858
|
-
virtual void run_task() override {
|
|
859
|
-
resource_.clear();
|
|
860
|
-
}
|
|
861
|
-
private:
|
|
862
|
-
std::vector<any_acl_resource> resource_;
|
|
863
|
-
};
|
|
864
|
-
|
|
865
|
-
/**
|
|
866
|
-
* @brief Task class for performing asynchronous memory copy operations.
|
|
827
|
+
* @param ctx Backend context providing stream/allocator utilities.
|
|
828
|
+
* @param dst Output tensor; src deps are k, v, q, g, s as above.
|
|
867
829
|
*/
|
|
868
|
-
|
|
869
|
-
public:
|
|
870
|
-
async_memcpy_task(void* dst, const void* src, size_t size,
|
|
871
|
-
aclrtMemcpyKind kind, aclrtStream stream)
|
|
872
|
-
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
|
|
873
|
-
|
|
874
|
-
virtual void run_task() override {
|
|
875
|
-
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
|
|
876
|
-
}
|
|
877
|
-
private:
|
|
878
|
-
void* dst_;
|
|
879
|
-
const void* src_;
|
|
880
|
-
size_t size_;
|
|
881
|
-
aclrtMemcpyKind kind_;
|
|
882
|
-
aclrtStream stream_;
|
|
883
|
-
};
|
|
884
|
-
|
|
885
|
-
/**
|
|
886
|
-
* @brief Task class for performing asynchronous memory set operations.
|
|
887
|
-
*/
|
|
888
|
-
class async_memset_task : public cann_task {
|
|
889
|
-
public:
|
|
890
|
-
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
|
|
891
|
-
: buffer_(buffer), size_(size), value_(value), stream_(stream) {}
|
|
892
|
-
|
|
893
|
-
virtual void run_task() override {
|
|
894
|
-
ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
|
|
895
|
-
}
|
|
896
|
-
private:
|
|
897
|
-
void* buffer_;
|
|
898
|
-
size_t size_;
|
|
899
|
-
int32_t value_;
|
|
900
|
-
aclrtStream stream_;
|
|
901
|
-
};
|
|
830
|
+
void ggml_cann_gated_linear_attn(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
902
831
|
|
|
903
832
|
/**
|
|
904
833
|
* @brief Launches an asynchronous task using the memory allocator.
|
|
@@ -918,91 +847,19 @@ class async_memset_task : public cann_task {
|
|
|
918
847
|
* same stream are executed in queue order.
|
|
919
848
|
*/
|
|
920
849
|
|
|
921
|
-
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
|
|
935
|
-
executor, CTX.stream()); \
|
|
936
|
-
CTX.task_queue.submit_task(std::move(task)); \
|
|
937
|
-
} else { \
|
|
938
|
-
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\
|
|
939
|
-
} \
|
|
940
|
-
} while (0)
|
|
941
|
-
|
|
942
|
-
/**
|
|
943
|
-
* @brief Registers and releases multiple ACL resources, optionally deferring the release
|
|
944
|
-
* using a task.
|
|
945
|
-
*
|
|
946
|
-
* @tparam Args Types of the ACL resources.
|
|
947
|
-
* @param ctx Backend context which manages task submission and async mode.
|
|
948
|
-
* @param args Pointers to ACL resources to be released.
|
|
949
|
-
*/
|
|
950
|
-
template <typename... Args>
|
|
951
|
-
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
|
|
952
|
-
std::vector<any_acl_resource> resources;
|
|
953
|
-
register_acl_resources(resources, std::forward<Args>(args)...);
|
|
954
|
-
if(ctx.async_mode) {
|
|
955
|
-
auto task = std::make_unique<release_resource_task>(std::move(resources));
|
|
956
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
/**
|
|
961
|
-
* @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
|
|
962
|
-
*
|
|
963
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
964
|
-
* @param dst Destination memory address.
|
|
965
|
-
* @param src Source memory address.
|
|
966
|
-
* @param len Size of memory to copy (in bytes).
|
|
967
|
-
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
|
|
968
|
-
*/
|
|
969
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
|
|
970
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
971
|
-
if (ctx.async_mode) {
|
|
972
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
|
|
973
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
974
|
-
} else {
|
|
975
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx.stream()));
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
|
|
979
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
|
|
980
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
981
|
-
if (ctx->async_mode) {
|
|
982
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
|
|
983
|
-
ctx->task_queue.submit_task(std::move(task));
|
|
984
|
-
} else {
|
|
985
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx->stream()));
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
/**
|
|
990
|
-
* @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
|
|
991
|
-
*
|
|
992
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
993
|
-
* @param buffer Memory buffer to be set.
|
|
994
|
-
* @param size Size of the memory buffer (in bytes).
|
|
995
|
-
* @param value Value to set in the buffer.
|
|
996
|
-
*/
|
|
997
|
-
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
|
|
998
|
-
size_t size, int value) {
|
|
999
|
-
if (ctx.async_mode) {
|
|
1000
|
-
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
|
|
1001
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
1002
|
-
} else {
|
|
1003
|
-
ACL_CHECK(aclrtMemsetAsync(buffer, size, value, size, ctx.stream()));
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
850
|
+
# define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
|
|
851
|
+
do { \
|
|
852
|
+
uint64_t workspaceSize = 0; \
|
|
853
|
+
aclOpExecutor * executor; \
|
|
854
|
+
void * workspaceAddr = nullptr; \
|
|
855
|
+
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
|
856
|
+
/* workspace should alloced in main thread to keep malloc order when using vmm. */ \
|
|
857
|
+
if (workspaceSize > 0) { \
|
|
858
|
+
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
|
|
859
|
+
workspaceAddr = workspace_allocator.get(); \
|
|
860
|
+
} \
|
|
861
|
+
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
|
|
862
|
+
} while (0)
|
|
1006
863
|
|
|
1007
864
|
/**
|
|
1008
865
|
* @brief Performs sparse expert-based matrix multiplication using the CANN backend.
|
|
@@ -1029,7 +886,23 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
|
|
|
1029
886
|
* @param dst The destination tensor where the expert-weighted token outputs are stored.
|
|
1030
887
|
* Expected to be of shape [M, K, N, 1].
|
|
1031
888
|
*/
|
|
1032
|
-
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
889
|
+
void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
890
|
+
|
|
891
|
+
/**
|
|
892
|
+
* @brief Performs fused ADD + RMS_NORM operation using the CANN backend.
|
|
893
|
+
*
|
|
894
|
+
* This function fuses the ADD and RMS_NORM operations into a single kernel call
|
|
895
|
+
* for better performance. It first adds two input tensors (x1 + x2), then applies
|
|
896
|
+
* RMS normalization to the result.
|
|
897
|
+
*
|
|
898
|
+
* @param ctx The context for the CANN backend operations.
|
|
899
|
+
* @param dst The ADD operation node, contains the two input tensors to be added.
|
|
900
|
+
* @param rms_norm_tensor The RMS_NORM operation node, contains the gamma weights
|
|
901
|
+
* and epsilon parameter.
|
|
902
|
+
*/
|
|
903
|
+
void ggml_cann_op_add_rms_norm_fused(ggml_backend_cann_context & ctx,
|
|
904
|
+
ggml_tensor * add_node,
|
|
905
|
+
ggml_tensor * rms_norm_node);
|
|
1033
906
|
|
|
1034
907
|
/**
|
|
1035
908
|
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
|
|
@@ -1041,20 +914,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
1041
914
|
*
|
|
1042
915
|
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
|
|
1043
916
|
*/
|
|
1044
|
-
static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
1045
|
-
std::string
|
|
1046
|
-
static const std::unordered_set<std::string> weight_suffixes{
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
"ffn_gate.weight",
|
|
1053
|
-
"ffn_up.weight",
|
|
1054
|
-
"ffn_down.weight"
|
|
1055
|
-
};
|
|
1056
|
-
|
|
1057
|
-
for (const auto& suffix : weight_suffixes) {
|
|
917
|
+
static bool is_matmul_weight(const ggml_tensor * tensor) {
|
|
918
|
+
std::string name = ggml_get_name(tensor);
|
|
919
|
+
static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
|
|
920
|
+
"attn_k.weight", "attn_v.weight",
|
|
921
|
+
"attn_output.weight", "ffn_gate.weight",
|
|
922
|
+
"ffn_up.weight", "ffn_down.weight" };
|
|
923
|
+
|
|
924
|
+
for (const auto & suffix : weight_suffixes) {
|
|
1058
925
|
if (name.find(suffix) != std::string::npos) {
|
|
1059
926
|
return true;
|
|
1060
927
|
}
|
|
@@ -1078,23 +945,17 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
|
1078
945
|
* @param ctx The CANN backend context used to manage execution and resources.
|
|
1079
946
|
* @param dst The destination tensor.
|
|
1080
947
|
*/
|
|
1081
|
-
template <auto binary_op>
|
|
1082
|
-
|
|
1083
|
-
ggml_tensor*
|
|
1084
|
-
ggml_tensor* src1 = dst->src[1];
|
|
948
|
+
template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
949
|
+
ggml_tensor * src0 = dst->src[0];
|
|
950
|
+
ggml_tensor * src1 = dst->src[1];
|
|
1085
951
|
|
|
1086
|
-
|
|
1087
|
-
aclTensor* acl_src1;
|
|
1088
|
-
aclTensor* acl_dst;
|
|
952
|
+
acl_tensor_ptr acl_src0, acl_src1, acl_dst;
|
|
1089
953
|
|
|
1090
954
|
// Need bcast
|
|
1091
|
-
bcast_shape(src0, src1, dst,
|
|
1092
|
-
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
|
1093
|
-
|
|
1094
|
-
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
|
|
955
|
+
bcast_shape(src0, src1, dst, acl_src0, acl_src1, acl_dst);
|
|
956
|
+
binary_op(ctx, acl_src0.get(), acl_src1.get(), acl_dst.get());
|
|
1095
957
|
}
|
|
1096
958
|
|
|
1097
|
-
|
|
1098
959
|
/**
|
|
1099
960
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
|
1100
961
|
*
|
|
@@ -1102,20 +963,19 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1102
963
|
* and stores the result in the destination tensor.
|
|
1103
964
|
*
|
|
1104
965
|
* @tparam unary_op A callable with the signature:
|
|
1105
|
-
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
|
966
|
+
* void(ggml_backend_cann_context&, aclTensor *, aclTensor *)
|
|
1106
967
|
* where the first aclTensor is the source and the second is the destination.
|
|
1107
968
|
* @param ctx The CANN backend context for managing resources and execution.
|
|
1108
969
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
|
1109
970
|
*/
|
|
1110
|
-
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
1111
|
-
|
|
1112
|
-
ggml_tensor* src = dst->src[0];
|
|
971
|
+
template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
|
|
972
|
+
void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
973
|
+
ggml_tensor * src = dst->src[0];
|
|
1113
974
|
|
|
1114
|
-
|
|
1115
|
-
|
|
975
|
+
acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
|
|
976
|
+
acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
|
|
1116
977
|
|
|
1117
|
-
unary_op(ctx, acl_src, acl_dst);
|
|
1118
|
-
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
978
|
+
unary_op(ctx, acl_src.get(), acl_dst.get());
|
|
1119
979
|
}
|
|
1120
980
|
|
|
1121
981
|
/**
|
|
@@ -1138,9 +998,11 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
1138
998
|
*
|
|
1139
999
|
* @see GGML_CANN_CALL_OP_UNARY
|
|
1140
1000
|
*/
|
|
1141
|
-
void ggml_cann_op_unary(
|
|
1142
|
-
|
|
1143
|
-
|
|
1001
|
+
void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1002
|
+
ggml_backend_cann_context & ctx,
|
|
1003
|
+
ggml_tensor * dst);
|
|
1004
|
+
|
|
1005
|
+
void ggml_cann_ssm_conv(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
1144
1006
|
|
|
1145
1007
|
/**
|
|
1146
1008
|
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
|
|
@@ -1172,9 +1034,9 @@ void ggml_cann_op_unary(
|
|
|
1172
1034
|
*
|
|
1173
1035
|
* @see GGML_CANN_CALL_OP_UNARY_GATED
|
|
1174
1036
|
*/
|
|
1175
|
-
void ggml_cann_op_unary_gated(
|
|
1176
|
-
|
|
1177
|
-
|
|
1037
|
+
void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1038
|
+
ggml_backend_cann_context & ctx,
|
|
1039
|
+
ggml_tensor * dst);
|
|
1178
1040
|
|
|
1179
1041
|
/**
|
|
1180
1042
|
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
|
|
@@ -1197,16 +1059,13 @@ void ggml_cann_op_unary_gated(
|
|
|
1197
1059
|
* @see ggml_cann_op_unary
|
|
1198
1060
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1199
1061
|
*/
|
|
1200
|
-
#define GGML_CANN_CALL_OP_UNARY(OP_NAME)
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
}
|
|
1207
|
-
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1208
|
-
} \
|
|
1209
|
-
while (0)
|
|
1062
|
+
# define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
1063
|
+
do { \
|
|
1064
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1065
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1066
|
+
}; \
|
|
1067
|
+
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1068
|
+
} while (0)
|
|
1210
1069
|
|
|
1211
1070
|
/**
|
|
1212
1071
|
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
|
|
@@ -1229,15 +1088,32 @@ void ggml_cann_op_unary_gated(
|
|
|
1229
1088
|
* @see ggml_cann_op_unary_gated
|
|
1230
1089
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1231
1090
|
*/
|
|
1232
|
-
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME)
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1239
|
-
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1240
|
-
} \
|
|
1241
|
-
while (0)
|
|
1091
|
+
# define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
1092
|
+
do { \
|
|
1093
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1094
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1095
|
+
}; \
|
|
1096
|
+
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1097
|
+
} while (0)
|
|
1242
1098
|
|
|
1243
1099
|
#endif // CANN_ACLNN_OPS
|
|
1100
|
+
|
|
1101
|
+
/**
|
|
1102
|
+
* @brief Performs outer product operation on two ggml tensors using the CANN backend.
|
|
1103
|
+
*
|
|
1104
|
+
* @details This function computes the outer product of two input tensors (src0 and src1)
|
|
1105
|
+
* and stores the result in the destination tensor. The outer product operation is defined as:
|
|
1106
|
+
* dst[i,j,k,l] = sum_m (src0[i,m,k,l] * src1[j,m,k,l])
|
|
1107
|
+
*
|
|
1108
|
+
* The function supports multiple data types including F32, F16. For floating-point
|
|
1109
|
+
* types, it uses batch matrix multiplication for efficient computation.
|
|
1110
|
+
*
|
|
1111
|
+
* The implementation handles 4D tensor broadcasting and batch processing automatically.
|
|
1112
|
+
*
|
|
1113
|
+
* @param ctx The CANN backend context for operation execution and memory management.
|
|
1114
|
+
* @param dst The destination ggml_tensor where the outer product result will be stored.
|
|
1115
|
+
* The input tensors are assumed to be `dst->src[0]` and `dst->src[1]`.
|
|
1116
|
+
*
|
|
1117
|
+
* @see GGML_CANN_CALL_ACLNN_OP for CANN operator invocation
|
|
1118
|
+
*/
|
|
1119
|
+
void ggml_cann_out_prod(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|