whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -23,31 +23,36 @@
|
|
|
23
23
|
#ifndef CANN_ACLNN_OPS
|
|
24
24
|
#define CANN_ACLNN_OPS
|
|
25
25
|
|
|
26
|
-
#include
|
|
27
|
-
#include
|
|
26
|
+
#include "acl_tensor.h"
|
|
27
|
+
#include "common.h"
|
|
28
|
+
|
|
28
29
|
#include <aclnnop/aclnn_abs.h>
|
|
29
|
-
#include <aclnnop/aclnn_neg.h>
|
|
30
|
-
#include <aclnnop/aclnn_exp.h>
|
|
31
30
|
#include <aclnnop/aclnn_arange.h>
|
|
32
31
|
#include <aclnnop/aclnn_argsort.h>
|
|
33
32
|
#include <aclnnop/aclnn_cat.h>
|
|
34
33
|
#include <aclnnop/aclnn_clamp.h>
|
|
34
|
+
#include <aclnnop/aclnn_cos.h>
|
|
35
|
+
#include <aclnnop/aclnn_exp.h>
|
|
35
36
|
#include <aclnnop/aclnn_gelu.h>
|
|
36
37
|
#include <aclnnop/aclnn_gelu_v2.h>
|
|
37
|
-
#include <aclnnop/aclnn_sigmoid.h>
|
|
38
38
|
#include <aclnnop/aclnn_hardsigmoid.h>
|
|
39
39
|
#include <aclnnop/aclnn_hardswish.h>
|
|
40
40
|
#include <aclnnop/aclnn_leaky_relu.h>
|
|
41
|
+
#include <aclnnop/aclnn_log.h>
|
|
42
|
+
#include <aclnnop/aclnn_logsoftmax.h>
|
|
43
|
+
#include <aclnnop/aclnn_neg.h>
|
|
44
|
+
#include <aclnnop/aclnn_norm.h>
|
|
41
45
|
#include <aclnnop/aclnn_relu.h>
|
|
46
|
+
#include <aclnnop/aclnn_sigmoid.h>
|
|
47
|
+
#include <aclnnop/aclnn_sign.h>
|
|
42
48
|
#include <aclnnop/aclnn_silu.h>
|
|
43
|
-
#include <aclnnop/aclnn_tanh.h>
|
|
44
|
-
#include <aclnnop/aclnn_sqrt.h>
|
|
45
49
|
#include <aclnnop/aclnn_sin.h>
|
|
46
|
-
#include <aclnnop/
|
|
47
|
-
#include <aclnnop/
|
|
48
|
-
#include <aclnnop/
|
|
49
|
-
|
|
50
|
-
#include
|
|
50
|
+
#include <aclnnop/aclnn_slice.h>
|
|
51
|
+
#include <aclnnop/aclnn_sqrt.h>
|
|
52
|
+
#include <aclnnop/aclnn_tanh.h>
|
|
53
|
+
|
|
54
|
+
#include <functional>
|
|
55
|
+
#include <unordered_set>
|
|
51
56
|
|
|
52
57
|
/**
|
|
53
58
|
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
|
@@ -62,7 +67,7 @@
|
|
|
62
67
|
* @param dst The ggml tensor representing the destination, which op is
|
|
63
68
|
* GGML_OP_REPEAT and specifies the desired dimensions.
|
|
64
69
|
*/
|
|
65
|
-
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
70
|
+
void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
66
71
|
|
|
67
72
|
/**
|
|
68
73
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|
@@ -82,7 +87,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
82
87
|
* @param dst The destination tensor where the result of the Leaky ReLU
|
|
83
88
|
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
|
84
89
|
*/
|
|
85
|
-
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
90
|
+
void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
86
91
|
|
|
87
92
|
/**
|
|
88
93
|
* @brief Concatenates multiple tensors along a specified dimension using the
|
|
@@ -97,7 +102,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
97
102
|
* @attention tensorList length should be 2 and the dimension using for concat
|
|
98
103
|
* default to 1.
|
|
99
104
|
*/
|
|
100
|
-
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
105
|
+
void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
101
106
|
|
|
102
107
|
/**
|
|
103
108
|
* @brief Generates a sequence of evenly spaced values within a specified
|
|
@@ -113,7 +118,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
113
118
|
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
|
114
119
|
* `GGML_OP_ARANGE`.
|
|
115
120
|
*/
|
|
116
|
-
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
121
|
+
void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
117
122
|
|
|
118
123
|
/**
|
|
119
124
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
|
@@ -131,7 +136,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
131
136
|
* @param dst The destination tensor where the clamped values will be stored.
|
|
132
137
|
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
|
133
138
|
*/
|
|
134
|
-
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
139
|
+
void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
135
140
|
|
|
136
141
|
/**
|
|
137
142
|
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
|
@@ -148,7 +153,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
148
153
|
* @param dst The destination tensor where the scaled values will be stored.
|
|
149
154
|
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
|
150
155
|
*/
|
|
151
|
-
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
156
|
+
void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
152
157
|
|
|
153
158
|
/**
|
|
154
159
|
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
|
@@ -163,7 +168,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
163
168
|
* @param dst The destination tensor where the sorted indices will be stored.
|
|
164
169
|
* dst->op is `GGML_OP_ARGSORT`.
|
|
165
170
|
*/
|
|
166
|
-
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
171
|
+
void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
167
172
|
|
|
168
173
|
/**
|
|
169
174
|
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
|
@@ -185,7 +190,67 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
185
190
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
186
191
|
* @attention `Var` defaults to dst->ne[0].
|
|
187
192
|
*/
|
|
188
|
-
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
193
|
+
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
|
|
197
|
+
* backend.
|
|
198
|
+
*
|
|
199
|
+
* @details This function applies the L2 Normalization operation on the
|
|
200
|
+
* input tensor `src` and stores the result in the destination tensor
|
|
201
|
+
* `dst`. L2 Normalization scales the input tensor such that the
|
|
202
|
+
* L2 norm along the specified dimension equals 1. This operation
|
|
203
|
+
* is commonly used in neural networks for feature normalization
|
|
204
|
+
* and vector scaling.
|
|
205
|
+
* The operation is defined as:
|
|
206
|
+
* \f[
|
|
207
|
+
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
|
|
208
|
+
* \f]
|
|
209
|
+
* The normalization is performed along the last dimension by default.
|
|
210
|
+
*
|
|
211
|
+
* @param ctx The CANN context used for operations.
|
|
212
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
|
213
|
+
* @attention The normalization is performed along the last dimension of the
|
|
214
|
+
* input tensor by default.
|
|
215
|
+
*/
|
|
216
|
+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
|
|
220
|
+
* backend.
|
|
221
|
+
*
|
|
222
|
+
* @details This function computes the cross entropy loss between the predicted
|
|
223
|
+
* logits and target probability distributions. The operation follows
|
|
224
|
+
* the same computation pattern as the CPU implementation:
|
|
225
|
+
* 1. Applies log_softmax to the logits along the class dimension
|
|
226
|
+
* 2. Element-wise multiplication with target distributions
|
|
227
|
+
* 3. Summation along the class dimension to get per-sample losses
|
|
228
|
+
* 4. Global summation and scaling by -1/nr to get final loss
|
|
229
|
+
*
|
|
230
|
+
* The computation can be expressed as:
|
|
231
|
+
* \f[
|
|
232
|
+
* \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
|
|
233
|
+
* \f]
|
|
234
|
+
* where \f$N\f$ is the total number of samples, \f$C\f$ is the number
|
|
235
|
+
* of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
|
|
236
|
+
* probability distributions.
|
|
237
|
+
*
|
|
238
|
+
* @param ctx The CANN context used for operations.
|
|
239
|
+
* @param dst The destination tensor where the computed loss will be stored.
|
|
240
|
+
* This should be a scalar tensor containing the final loss value.
|
|
241
|
+
*
|
|
242
|
+
* @note This implementation computes cross entropy between probability
|
|
243
|
+
* distributions, not the typical classification cross entropy that
|
|
244
|
+
* expects class indices as targets. Both input tensors (src0 and src1)
|
|
245
|
+
* should have the same shape and represent probability distributions
|
|
246
|
+
* over the class dimension.
|
|
247
|
+
* @note The function expects two source tensors:
|
|
248
|
+
* - dst->src[0]: Logits tensor (before softmax)
|
|
249
|
+
* - dst->src[1]: Target probability distributions tensor
|
|
250
|
+
* @note The computation is performed using CANN backend operators including
|
|
251
|
+
* LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
|
|
252
|
+
*/
|
|
253
|
+
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
189
254
|
|
|
190
255
|
/**
|
|
191
256
|
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
|
@@ -209,7 +274,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
209
274
|
*
|
|
210
275
|
* @attention eps defaults to 1e-6f.
|
|
211
276
|
*/
|
|
212
|
-
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
277
|
+
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
213
278
|
|
|
214
279
|
/**
|
|
215
280
|
* @brief Computes the accumulation of tensors using the CANN backend.
|
|
@@ -228,7 +293,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
228
293
|
* @param dst The destination tensor where the accumulated values will be stored.
|
|
229
294
|
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
|
230
295
|
*/
|
|
231
|
-
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
296
|
+
void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
232
297
|
|
|
233
298
|
/**
|
|
234
299
|
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
|
@@ -244,7 +309,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
244
309
|
*
|
|
245
310
|
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
|
246
311
|
*/
|
|
247
|
-
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
312
|
+
void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
248
313
|
|
|
249
314
|
/**
|
|
250
315
|
* @brief Computes the sum of elements in a ggml tensor.
|
|
@@ -258,7 +323,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
258
323
|
*
|
|
259
324
|
*/
|
|
260
325
|
|
|
261
|
-
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
326
|
+
void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
262
327
|
|
|
263
328
|
/**
|
|
264
329
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|
@@ -274,8 +339,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
274
339
|
* @param dst The destination tensor where the upsampled values will be stored.
|
|
275
340
|
* dst->op is `GGML_OP_UPSCALE`.
|
|
276
341
|
*/
|
|
277
|
-
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
278
|
-
ggml_tensor* dst);
|
|
342
|
+
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
279
343
|
|
|
280
344
|
/**
|
|
281
345
|
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
|
@@ -290,7 +354,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
290
354
|
* @param dst The destination tensor, which specifies the target dimensions for
|
|
291
355
|
* padding. dst->op is `GGML_OP_PAD`.
|
|
292
356
|
*/
|
|
293
|
-
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
357
|
+
void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
294
358
|
|
|
295
359
|
/**
|
|
296
360
|
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
|
@@ -307,7 +371,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
307
371
|
* @param dst The destination tensor on which the pooling operation is to be
|
|
308
372
|
* performed. dst->op is `GGML_OP_POOL_2D`.
|
|
309
373
|
*/
|
|
310
|
-
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
374
|
+
void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
311
375
|
|
|
312
376
|
/**
|
|
313
377
|
* @brief Duplicates a ggml tensor using the CANN backend.
|
|
@@ -326,7 +390,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
326
390
|
* different shape and dst is no-contiguous.
|
|
327
391
|
* @note: This func need to simplify.
|
|
328
392
|
*/
|
|
329
|
-
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
393
|
+
void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
330
394
|
|
|
331
395
|
/**
|
|
332
396
|
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
|
@@ -348,7 +412,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
348
412
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
349
413
|
* dst->op is `GGML_OP_RMS_NORM`.
|
|
350
414
|
*/
|
|
351
|
-
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
415
|
+
void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
352
416
|
|
|
353
417
|
/**
|
|
354
418
|
* @brief Applies a diagonal mask to the tensor with a specified value.
|
|
@@ -363,7 +427,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
363
427
|
* `GGML_OP_DIAG_MASK`
|
|
364
428
|
* @param value The value to use for masking.
|
|
365
429
|
*/
|
|
366
|
-
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
|
430
|
+
void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
|
|
367
431
|
|
|
368
432
|
/**
|
|
369
433
|
* @brief Performs an image-to-column transformation on the input tensor.
|
|
@@ -378,7 +442,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
|
|
|
378
442
|
* @param dst The destination tensor that stores the result of the operation.
|
|
379
443
|
* dst->op is `GGML_OP_IM2COL`.
|
|
380
444
|
*/
|
|
381
|
-
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
445
|
+
void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
382
446
|
|
|
383
447
|
/**
|
|
384
448
|
* @brief Computes time step embeddings using sine and cosine functions.
|
|
@@ -392,10 +456,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
392
456
|
* @param dst The destination tensor where the result of the embedding operation
|
|
393
457
|
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
|
394
458
|
*/
|
|
395
|
-
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
459
|
+
void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
396
460
|
|
|
397
461
|
// @see ggml_cann_dup.
|
|
398
|
-
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
462
|
+
void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
399
463
|
|
|
400
464
|
/**
|
|
401
465
|
* @brief Computes the softmax activation with optional masking.
|
|
@@ -417,7 +481,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
417
481
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|
418
482
|
* `GGML_OP_SOFTMAX`.
|
|
419
483
|
*/
|
|
420
|
-
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
484
|
+
void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
421
485
|
|
|
422
486
|
/**
|
|
423
487
|
* @brief Extracts specific rows from a tensor based on indices.
|
|
@@ -429,7 +493,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
429
493
|
* @param ctx The backend CANN context for executing operations.
|
|
430
494
|
* @param dst The destination tensor where the extracted rows will be stored.
|
|
431
495
|
*/
|
|
432
|
-
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
496
|
+
void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
433
497
|
|
|
434
498
|
/**
|
|
435
499
|
* @brief Writes specific rows into a tensor at positions specified by indices.
|
|
@@ -441,7 +505,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
441
505
|
* @param ctx The backend CANN context for executing operations.
|
|
442
506
|
* @param dst The destination tensor where the specified rows will be updated.
|
|
443
507
|
*/
|
|
444
|
-
void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
508
|
+
void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
445
509
|
|
|
446
510
|
/**
|
|
447
511
|
* @brief Executes matrix multiplication for the given tensor.
|
|
@@ -454,7 +518,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
454
518
|
* @param dst The destination tensor for storing the result of the matrix
|
|
455
519
|
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
|
456
520
|
*/
|
|
457
|
-
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
521
|
+
void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
458
522
|
|
|
459
523
|
/**
|
|
460
524
|
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
|
@@ -477,7 +541,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
477
541
|
* @note The function currently does not support cases where the freq_scale is
|
|
478
542
|
* not equal 1.
|
|
479
543
|
*/
|
|
480
|
-
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
544
|
+
void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
481
545
|
|
|
482
546
|
/**
|
|
483
547
|
* @brief Computes the index of the maximum value along the specified dimension
|
|
@@ -492,7 +556,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
492
556
|
* @param dst The destination tensor where the indices of the maximum values will
|
|
493
557
|
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
|
494
558
|
*/
|
|
495
|
-
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
559
|
+
void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
496
560
|
|
|
497
561
|
/**
|
|
498
562
|
* @brief Adds two tensors element-wise and stores the result in a destination
|
|
@@ -509,8 +573,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
509
573
|
* @param acl_src1 The second source tensor.
|
|
510
574
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
511
575
|
*/
|
|
512
|
-
void aclnn_add(ggml_backend_cann_context& ctx,
|
|
513
|
-
|
|
576
|
+
void aclnn_add(ggml_backend_cann_context & ctx,
|
|
577
|
+
aclTensor * acl_src0,
|
|
578
|
+
aclTensor * acl_src1,
|
|
579
|
+
aclTensor * acl_dst = nullptr);
|
|
514
580
|
|
|
515
581
|
/**
|
|
516
582
|
* @brief Sub two tensors element-wise and stores the result in a destination
|
|
@@ -527,8 +593,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
527
593
|
* @param acl_src1 The second source tensor.
|
|
528
594
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
529
595
|
*/
|
|
530
|
-
void aclnn_sub(ggml_backend_cann_context& ctx,
|
|
531
|
-
|
|
596
|
+
void aclnn_sub(ggml_backend_cann_context & ctx,
|
|
597
|
+
aclTensor * acl_src0,
|
|
598
|
+
aclTensor * acl_src1,
|
|
599
|
+
aclTensor * acl_dst = nullptr);
|
|
532
600
|
|
|
533
601
|
/**
|
|
534
602
|
* @brief Performs element-wise multiplication of two tensors and stores the
|
|
@@ -546,8 +614,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
546
614
|
* @param acl_other The second tensor for element-wise multiplication.
|
|
547
615
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
548
616
|
*/
|
|
549
|
-
void aclnn_mul(ggml_backend_cann_context& ctx,
|
|
550
|
-
|
|
617
|
+
void aclnn_mul(ggml_backend_cann_context & ctx,
|
|
618
|
+
aclTensor * acl_src,
|
|
619
|
+
aclTensor * acl_other,
|
|
620
|
+
aclTensor * acl_dst = nullptr);
|
|
551
621
|
|
|
552
622
|
/**
|
|
553
623
|
* @brief Matrix division, optionally in-place.
|
|
@@ -567,8 +637,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
567
637
|
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
568
638
|
* `acl_src`.
|
|
569
639
|
*/
|
|
570
|
-
void aclnn_div(ggml_backend_cann_context& ctx,
|
|
571
|
-
|
|
640
|
+
void aclnn_div(ggml_backend_cann_context & ctx,
|
|
641
|
+
aclTensor * acl_src,
|
|
642
|
+
aclTensor * acl_other,
|
|
643
|
+
aclTensor * acl_dst = nullptr);
|
|
572
644
|
|
|
573
645
|
/**
|
|
574
646
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
|
@@ -584,8 +656,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
584
656
|
* @param acl_dst The destination tensor where the cosine results will be
|
|
585
657
|
* stored.
|
|
586
658
|
*/
|
|
587
|
-
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
588
|
-
aclTensor* acl_dst);
|
|
659
|
+
void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
589
660
|
|
|
590
661
|
/**
|
|
591
662
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
|
@@ -602,8 +673,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
602
673
|
* @param acl_src The source tensor on which the sine function will be applied.
|
|
603
674
|
* @param acl_dst The destination tensor where the sine results will be stored.
|
|
604
675
|
*/
|
|
605
|
-
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
606
|
-
aclTensor* acl_dst);
|
|
676
|
+
void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
607
677
|
|
|
608
678
|
/**
|
|
609
679
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
|
@@ -621,8 +691,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
621
691
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
|
622
692
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
|
623
693
|
*/
|
|
624
|
-
void bcast_shape(ggml_tensor *
|
|
625
|
-
|
|
694
|
+
void bcast_shape(ggml_tensor * src0,
|
|
695
|
+
ggml_tensor * src1,
|
|
696
|
+
ggml_tensor * dst,
|
|
697
|
+
acl_tensor_ptr & acl_src0,
|
|
698
|
+
acl_tensor_ptr & acl_src1,
|
|
699
|
+
acl_tensor_ptr & acl_dst);
|
|
626
700
|
|
|
627
701
|
/**
|
|
628
702
|
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
|
@@ -637,7 +711,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
|
|
637
711
|
* @param dst The destination tensor where the transposed convolution result
|
|
638
712
|
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
|
639
713
|
*/
|
|
640
|
-
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
714
|
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
641
715
|
|
|
642
716
|
/**
|
|
643
717
|
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
|
@@ -662,7 +736,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|
|
662
736
|
* @param dst The destination tensor where the ELU-activated result will be stored.
|
|
663
737
|
* dst->op is expected to be `GGML_OP_ELU`.
|
|
664
738
|
*/
|
|
665
|
-
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
739
|
+
void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
666
740
|
|
|
667
741
|
/**
|
|
668
742
|
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
|
|
@@ -677,7 +751,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
677
751
|
* @param dst The destination tensor where the mean result will be stored.
|
|
678
752
|
* dst->op is expected to be `GGML_OP_MEAN`.
|
|
679
753
|
*/
|
|
680
|
-
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
754
|
+
void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
681
755
|
|
|
682
756
|
/**
|
|
683
757
|
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
|
|
@@ -692,7 +766,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
692
766
|
* @param dst The destination tensor where the padded result will be stored.
|
|
693
767
|
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
|
|
694
768
|
*/
|
|
695
|
-
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
769
|
+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
696
770
|
|
|
697
771
|
/**
|
|
698
772
|
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
|
|
@@ -708,7 +782,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
708
782
|
* @param dst The destination tensor where the result will be stored.
|
|
709
783
|
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
|
|
710
784
|
*/
|
|
711
|
-
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
785
|
+
void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
712
786
|
|
|
713
787
|
/**
|
|
714
788
|
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
|
|
@@ -723,7 +797,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
723
797
|
* @param dst The destination tensor where the result will be stored.
|
|
724
798
|
* dst->op is expected to be `GGML_OP_STEP`.
|
|
725
799
|
*/
|
|
726
|
-
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
800
|
+
void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
727
801
|
|
|
728
802
|
/**
|
|
729
803
|
* @brief Performs the Flash Attention extended operator using the CANN backend.
|
|
@@ -738,59 +812,46 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
738
812
|
* @param dst The destination tensor where the result will be stored.
|
|
739
813
|
* dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
|
|
740
814
|
*/
|
|
741
|
-
void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
815
|
+
void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
742
816
|
|
|
743
817
|
/*
|
|
744
818
|
* @brief A generic wrapper for ACL resources with custom deleter support.
|
|
745
819
|
*/
|
|
746
|
-
using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
|
|
820
|
+
using any_acl_resource = std::unique_ptr<void, std::function<void(void *)>>;
|
|
747
821
|
|
|
748
822
|
/**
|
|
749
823
|
* @brief Trait structure used to define how to destroy a given ACL resource type.
|
|
750
824
|
*
|
|
751
825
|
* @tparam T ACL resource type.
|
|
752
826
|
*/
|
|
753
|
-
template<typename T>
|
|
754
|
-
struct acl_resource_traits;
|
|
827
|
+
template <typename T> struct acl_resource_traits;
|
|
755
828
|
|
|
756
829
|
/**
|
|
757
830
|
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
|
|
758
831
|
*/
|
|
759
|
-
template<>
|
|
760
|
-
|
|
761
|
-
static void destroy(void* p) {
|
|
762
|
-
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
|
|
763
|
-
}
|
|
832
|
+
template <> struct acl_resource_traits<aclTensor> {
|
|
833
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); }
|
|
764
834
|
};
|
|
765
835
|
|
|
766
836
|
/**
|
|
767
837
|
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
|
|
768
838
|
*/
|
|
769
|
-
template<>
|
|
770
|
-
|
|
771
|
-
static void destroy(void* p) {
|
|
772
|
-
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
|
|
773
|
-
}
|
|
839
|
+
template <> struct acl_resource_traits<aclIntArray> {
|
|
840
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); }
|
|
774
841
|
};
|
|
775
842
|
|
|
776
843
|
/**
|
|
777
844
|
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
|
|
778
845
|
*/
|
|
779
|
-
template<>
|
|
780
|
-
|
|
781
|
-
static void destroy(void* p) {
|
|
782
|
-
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
|
|
783
|
-
}
|
|
846
|
+
template <> struct acl_resource_traits<aclScalar> {
|
|
847
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); }
|
|
784
848
|
};
|
|
785
849
|
|
|
786
850
|
/**
|
|
787
851
|
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
|
|
788
852
|
*/
|
|
789
|
-
template<>
|
|
790
|
-
|
|
791
|
-
static void destroy(void* p) {
|
|
792
|
-
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
|
|
793
|
-
}
|
|
853
|
+
template <> struct acl_resource_traits<aclTensorList> {
|
|
854
|
+
static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); }
|
|
794
855
|
};
|
|
795
856
|
|
|
796
857
|
/**
|
|
@@ -800,14 +861,8 @@ struct acl_resource_traits<aclTensorList> {
|
|
|
800
861
|
* @param ptr Raw pointer to ACL resource.
|
|
801
862
|
* @return any_acl_resource Smart pointer that handles destruction.
|
|
802
863
|
*/
|
|
803
|
-
template<typename T>
|
|
804
|
-
any_acl_resource
|
|
805
|
-
return any_acl_resource(
|
|
806
|
-
static_cast<void*>(ptr),
|
|
807
|
-
[](void* p) {
|
|
808
|
-
acl_resource_traits<T>::destroy(p);
|
|
809
|
-
}
|
|
810
|
-
);
|
|
864
|
+
template <typename T> any_acl_resource make_acl_resource(T * ptr) {
|
|
865
|
+
return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); });
|
|
811
866
|
}
|
|
812
867
|
|
|
813
868
|
/**
|
|
@@ -817,89 +872,10 @@ any_acl_resource make_acl_resource(T* ptr) {
|
|
|
817
872
|
* @param vec Target vector to hold ACL resources.
|
|
818
873
|
* @param args Raw pointers to ACL resources.
|
|
819
874
|
*/
|
|
820
|
-
template<typename... Args>
|
|
821
|
-
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
|
|
875
|
+
template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) {
|
|
822
876
|
(vec.emplace_back(make_acl_resource(args)), ...);
|
|
823
877
|
}
|
|
824
878
|
|
|
825
|
-
/**
|
|
826
|
-
* @brief Task class that wraps the execution of an aclnn function call.
|
|
827
|
-
*/
|
|
828
|
-
class aclnn_task : public cann_task {
|
|
829
|
-
public:
|
|
830
|
-
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
|
|
831
|
-
uint64_t workspace_size, aclOpExecutor * executor,
|
|
832
|
-
aclrtStream stream) :
|
|
833
|
-
aclnn_func_(aclnn_func),
|
|
834
|
-
workspace_addr_(workspace_addr),
|
|
835
|
-
workspace_size_(workspace_size),
|
|
836
|
-
executor_(executor),
|
|
837
|
-
stream_(stream) {}
|
|
838
|
-
virtual void run_task() override {
|
|
839
|
-
ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
|
|
840
|
-
}
|
|
841
|
-
private:
|
|
842
|
-
aclnn_func_t aclnn_func_;
|
|
843
|
-
void * workspace_addr_;
|
|
844
|
-
uint64_t workspace_size_;
|
|
845
|
-
aclOpExecutor * executor_;
|
|
846
|
-
aclrtStream stream_;
|
|
847
|
-
};
|
|
848
|
-
|
|
849
|
-
/**
|
|
850
|
-
* @brief Task class that releases ACL resources after usage.
|
|
851
|
-
*/
|
|
852
|
-
class release_resource_task : public cann_task {
|
|
853
|
-
public:
|
|
854
|
-
release_resource_task(std::vector<any_acl_resource>&& resources){
|
|
855
|
-
resource_ = std::move(resources);
|
|
856
|
-
}
|
|
857
|
-
|
|
858
|
-
virtual void run_task() override {
|
|
859
|
-
resource_.clear();
|
|
860
|
-
}
|
|
861
|
-
private:
|
|
862
|
-
std::vector<any_acl_resource> resource_;
|
|
863
|
-
};
|
|
864
|
-
|
|
865
|
-
/**
|
|
866
|
-
* @brief Task class for performing asynchronous memory copy operations.
|
|
867
|
-
*/
|
|
868
|
-
class async_memcpy_task : public cann_task {
|
|
869
|
-
public:
|
|
870
|
-
async_memcpy_task(void* dst, const void* src, size_t size,
|
|
871
|
-
aclrtMemcpyKind kind, aclrtStream stream)
|
|
872
|
-
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
|
|
873
|
-
|
|
874
|
-
virtual void run_task() override {
|
|
875
|
-
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
|
|
876
|
-
}
|
|
877
|
-
private:
|
|
878
|
-
void* dst_;
|
|
879
|
-
const void* src_;
|
|
880
|
-
size_t size_;
|
|
881
|
-
aclrtMemcpyKind kind_;
|
|
882
|
-
aclrtStream stream_;
|
|
883
|
-
};
|
|
884
|
-
|
|
885
|
-
/**
|
|
886
|
-
* @brief Task class for performing asynchronous memory set operations.
|
|
887
|
-
*/
|
|
888
|
-
class async_memset_task : public cann_task {
|
|
889
|
-
public:
|
|
890
|
-
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
|
|
891
|
-
: buffer_(buffer), size_(size), value_(value), stream_(stream) {}
|
|
892
|
-
|
|
893
|
-
virtual void run_task() override {
|
|
894
|
-
ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
|
|
895
|
-
}
|
|
896
|
-
private:
|
|
897
|
-
void* buffer_;
|
|
898
|
-
size_t size_;
|
|
899
|
-
int32_t value_;
|
|
900
|
-
aclrtStream stream_;
|
|
901
|
-
};
|
|
902
|
-
|
|
903
879
|
/**
|
|
904
880
|
* @brief Launches an asynchronous task using the memory allocator.
|
|
905
881
|
*
|
|
@@ -918,92 +894,20 @@ class async_memset_task : public cann_task {
|
|
|
918
894
|
* same stream are executed in queue order.
|
|
919
895
|
*/
|
|
920
896
|
|
|
921
|
-
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)
|
|
922
|
-
do {
|
|
923
|
-
uint64_t workspaceSize = 0;
|
|
924
|
-
aclOpExecutor * executor;
|
|
925
|
-
void * workspaceAddr = nullptr;
|
|
926
|
-
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor))
|
|
927
|
-
/* workspace should alloced in main thread to keep malloc order when using vmm. */
|
|
928
|
-
if (workspaceSize > 0) {
|
|
929
|
-
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);
|
|
930
|
-
workspaceAddr = workspace_allocator.get();
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
auto task = \
|
|
934
|
-
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
|
|
935
|
-
executor, CTX.stream()); \
|
|
936
|
-
CTX.task_queue.submit_task(std::move(task)); \
|
|
937
|
-
} else { \
|
|
938
|
-
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\
|
|
939
|
-
} \
|
|
897
|
+
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
|
|
898
|
+
do { \
|
|
899
|
+
uint64_t workspaceSize = 0; \
|
|
900
|
+
aclOpExecutor * executor; \
|
|
901
|
+
void * workspaceAddr = nullptr; \
|
|
902
|
+
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
|
903
|
+
/* workspace should alloced in main thread to keep malloc order when using vmm. */ \
|
|
904
|
+
if (workspaceSize > 0) { \
|
|
905
|
+
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
|
|
906
|
+
workspaceAddr = workspace_allocator.get(); \
|
|
907
|
+
} \
|
|
908
|
+
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
|
|
940
909
|
} while (0)
|
|
941
910
|
|
|
942
|
-
/**
|
|
943
|
-
* @brief Registers and releases multiple ACL resources, optionally deferring the release
|
|
944
|
-
* using a task.
|
|
945
|
-
*
|
|
946
|
-
* @tparam Args Types of the ACL resources.
|
|
947
|
-
* @param ctx Backend context which manages task submission and async mode.
|
|
948
|
-
* @param args Pointers to ACL resources to be released.
|
|
949
|
-
*/
|
|
950
|
-
template <typename... Args>
|
|
951
|
-
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
|
|
952
|
-
std::vector<any_acl_resource> resources;
|
|
953
|
-
register_acl_resources(resources, std::forward<Args>(args)...);
|
|
954
|
-
if(ctx.async_mode) {
|
|
955
|
-
auto task = std::make_unique<release_resource_task>(std::move(resources));
|
|
956
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
/**
|
|
961
|
-
* @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
|
|
962
|
-
*
|
|
963
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
964
|
-
* @param dst Destination memory address.
|
|
965
|
-
* @param src Source memory address.
|
|
966
|
-
* @param len Size of memory to copy (in bytes).
|
|
967
|
-
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
|
|
968
|
-
*/
|
|
969
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
|
|
970
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
971
|
-
if (ctx.async_mode) {
|
|
972
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
|
|
973
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
974
|
-
} else {
|
|
975
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx.stream()));
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
|
|
979
|
-
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
|
|
980
|
-
const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
981
|
-
if (ctx->async_mode) {
|
|
982
|
-
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
|
|
983
|
-
ctx->task_queue.submit_task(std::move(task));
|
|
984
|
-
} else {
|
|
985
|
-
ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx->stream()));
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
/**
|
|
990
|
-
* @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
|
|
991
|
-
*
|
|
992
|
-
* @param ctx Backend context containing stream and async configuration.
|
|
993
|
-
* @param buffer Memory buffer to be set.
|
|
994
|
-
* @param size Size of the memory buffer (in bytes).
|
|
995
|
-
* @param value Value to set in the buffer.
|
|
996
|
-
*/
|
|
997
|
-
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
|
|
998
|
-
size_t size, int value) {
|
|
999
|
-
if (ctx.async_mode) {
|
|
1000
|
-
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
|
|
1001
|
-
ctx.task_queue.submit_task(std::move(task));
|
|
1002
|
-
} else {
|
|
1003
|
-
ACL_CHECK(aclrtMemsetAsync(buffer, size, value, size, ctx.stream()));
|
|
1004
|
-
}
|
|
1005
|
-
}
|
|
1006
|
-
|
|
1007
911
|
/**
|
|
1008
912
|
* @brief Performs sparse expert-based matrix multiplication using the CANN backend.
|
|
1009
913
|
*
|
|
@@ -1029,7 +933,21 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
|
|
|
1029
933
|
* @param dst The destination tensor where the expert-weighted token outputs are stored.
|
|
1030
934
|
* Expected to be of shape [M, K, N, 1].
|
|
1031
935
|
*/
|
|
1032
|
-
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
936
|
+
void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
937
|
+
|
|
938
|
+
/**
|
|
939
|
+
* @brief Performs fused ADD + RMS_NORM operation using the CANN backend.
|
|
940
|
+
*
|
|
941
|
+
* This function fuses the ADD and RMS_NORM operations into a single kernel call
|
|
942
|
+
* for better performance. It first adds two input tensors (x1 + x2), then applies
|
|
943
|
+
* RMS normalization to the result.
|
|
944
|
+
*
|
|
945
|
+
* @param ctx The context for the CANN backend operations.
|
|
946
|
+
* @param dst The ADD operation node, contains the two input tensors to be added.
|
|
947
|
+
* @param rms_norm_tensor The RMS_NORM operation node, contains the gamma weights
|
|
948
|
+
* and epsilon parameter.
|
|
949
|
+
*/
|
|
950
|
+
void ggml_cann_op_add_rms_norm_fused(ggml_backend_cann_context & ctx, ggml_tensor * add_node, ggml_tensor * rms_norm_node);
|
|
1033
951
|
|
|
1034
952
|
/**
|
|
1035
953
|
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
|
|
@@ -1041,20 +959,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
1041
959
|
*
|
|
1042
960
|
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
|
|
1043
961
|
*/
|
|
1044
|
-
static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
1045
|
-
std::string
|
|
1046
|
-
static const std::unordered_set<std::string> weight_suffixes{
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
"ffn_gate.weight",
|
|
1053
|
-
"ffn_up.weight",
|
|
1054
|
-
"ffn_down.weight"
|
|
1055
|
-
};
|
|
1056
|
-
|
|
1057
|
-
for (const auto& suffix : weight_suffixes) {
|
|
962
|
+
static bool is_matmul_weight(const ggml_tensor * tensor) {
|
|
963
|
+
std::string name = ggml_get_name(tensor);
|
|
964
|
+
static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
|
|
965
|
+
"attn_k.weight", "attn_v.weight",
|
|
966
|
+
"attn_output.weight", "ffn_gate.weight",
|
|
967
|
+
"ffn_up.weight", "ffn_down.weight" };
|
|
968
|
+
|
|
969
|
+
for (const auto & suffix : weight_suffixes) {
|
|
1058
970
|
if (name.find(suffix) != std::string::npos) {
|
|
1059
971
|
return true;
|
|
1060
972
|
}
|
|
@@ -1078,23 +990,17 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
|
1078
990
|
* @param ctx The CANN backend context used to manage execution and resources.
|
|
1079
991
|
* @param dst The destination tensor.
|
|
1080
992
|
*/
|
|
1081
|
-
template <auto binary_op>
|
|
1082
|
-
|
|
1083
|
-
ggml_tensor*
|
|
1084
|
-
ggml_tensor* src1 = dst->src[1];
|
|
993
|
+
template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
994
|
+
ggml_tensor * src0 = dst->src[0];
|
|
995
|
+
ggml_tensor * src1 = dst->src[1];
|
|
1085
996
|
|
|
1086
|
-
|
|
1087
|
-
aclTensor* acl_src1;
|
|
1088
|
-
aclTensor* acl_dst;
|
|
997
|
+
acl_tensor_ptr acl_src0, acl_src1, acl_dst;
|
|
1089
998
|
|
|
1090
999
|
// Need bcast
|
|
1091
|
-
bcast_shape(src0, src1, dst,
|
|
1092
|
-
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
|
1093
|
-
|
|
1094
|
-
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
|
|
1000
|
+
bcast_shape(src0, src1, dst, acl_src0, acl_src1, acl_dst);
|
|
1001
|
+
binary_op(ctx, acl_src0.get(), acl_src1.get(), acl_dst.get());
|
|
1095
1002
|
}
|
|
1096
1003
|
|
|
1097
|
-
|
|
1098
1004
|
/**
|
|
1099
1005
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
|
1100
1006
|
*
|
|
@@ -1102,20 +1008,19 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1102
1008
|
* and stores the result in the destination tensor.
|
|
1103
1009
|
*
|
|
1104
1010
|
* @tparam unary_op A callable with the signature:
|
|
1105
|
-
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
|
1011
|
+
* void(ggml_backend_cann_context&, aclTensor *, aclTensor *)
|
|
1106
1012
|
* where the first aclTensor is the source and the second is the destination.
|
|
1107
1013
|
* @param ctx The CANN backend context for managing resources and execution.
|
|
1108
1014
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
|
1109
1015
|
*/
|
|
1110
|
-
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
1111
|
-
|
|
1112
|
-
ggml_tensor* src = dst->src[0];
|
|
1016
|
+
template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
|
|
1017
|
+
void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
1018
|
+
ggml_tensor * src = dst->src[0];
|
|
1113
1019
|
|
|
1114
|
-
|
|
1115
|
-
|
|
1020
|
+
acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
|
|
1021
|
+
acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
|
|
1116
1022
|
|
|
1117
|
-
unary_op(ctx, acl_src, acl_dst);
|
|
1118
|
-
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
1023
|
+
unary_op(ctx, acl_src.get(), acl_dst.get());
|
|
1119
1024
|
}
|
|
1120
1025
|
|
|
1121
1026
|
/**
|
|
@@ -1138,9 +1043,11 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
1138
1043
|
*
|
|
1139
1044
|
* @see GGML_CANN_CALL_OP_UNARY
|
|
1140
1045
|
*/
|
|
1141
|
-
void ggml_cann_op_unary(
|
|
1142
|
-
|
|
1143
|
-
|
|
1046
|
+
void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1047
|
+
ggml_backend_cann_context & ctx,
|
|
1048
|
+
ggml_tensor * dst);
|
|
1049
|
+
|
|
1050
|
+
void ggml_cann_ssm_conv(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
1144
1051
|
|
|
1145
1052
|
/**
|
|
1146
1053
|
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
|
|
@@ -1172,9 +1079,9 @@ void ggml_cann_op_unary(
|
|
|
1172
1079
|
*
|
|
1173
1080
|
* @see GGML_CANN_CALL_OP_UNARY_GATED
|
|
1174
1081
|
*/
|
|
1175
|
-
void ggml_cann_op_unary_gated(
|
|
1176
|
-
|
|
1177
|
-
|
|
1082
|
+
void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
1083
|
+
ggml_backend_cann_context & ctx,
|
|
1084
|
+
ggml_tensor * dst);
|
|
1178
1085
|
|
|
1179
1086
|
/**
|
|
1180
1087
|
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
|
|
@@ -1197,16 +1104,13 @@ void ggml_cann_op_unary_gated(
|
|
|
1197
1104
|
* @see ggml_cann_op_unary
|
|
1198
1105
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1199
1106
|
*/
|
|
1200
|
-
#define GGML_CANN_CALL_OP_UNARY(OP_NAME)
|
|
1201
|
-
do {
|
|
1202
|
-
auto lambda = [](ggml_backend_cann_context& ctx,
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1208
|
-
} \
|
|
1209
|
-
while (0)
|
|
1107
|
+
#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
1108
|
+
do { \
|
|
1109
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1110
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1111
|
+
}; \
|
|
1112
|
+
ggml_cann_op_unary(lambda, ctx, dst); \
|
|
1113
|
+
} while (0)
|
|
1210
1114
|
|
|
1211
1115
|
/**
|
|
1212
1116
|
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
|
|
@@ -1229,15 +1133,32 @@ void ggml_cann_op_unary_gated(
|
|
|
1229
1133
|
* @see ggml_cann_op_unary_gated
|
|
1230
1134
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
1231
1135
|
*/
|
|
1232
|
-
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME)
|
|
1233
|
-
do {
|
|
1234
|
-
auto lambda = [](ggml_backend_cann_context& ctx,
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1240
|
-
} \
|
|
1241
|
-
while (0)
|
|
1136
|
+
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
1137
|
+
do { \
|
|
1138
|
+
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
1139
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
1140
|
+
}; \
|
|
1141
|
+
ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
1142
|
+
} while (0)
|
|
1242
1143
|
|
|
1243
1144
|
#endif // CANN_ACLNN_OPS
|
|
1145
|
+
|
|
1146
|
+
/**
|
|
1147
|
+
* @brief Performs outer product operation on two ggml tensors using the CANN backend.
|
|
1148
|
+
*
|
|
1149
|
+
* @details This function computes the outer product of two input tensors (src0 and src1)
|
|
1150
|
+
* and stores the result in the destination tensor. The outer product operation is defined as:
|
|
1151
|
+
* dst[i,j,k,l] = sum_m (src0[i,m,k,l] * src1[j,m,k,l])
|
|
1152
|
+
*
|
|
1153
|
+
* The function supports multiple data types including F32, F16. For floating-point
|
|
1154
|
+
* types, it uses batch matrix multiplication for efficient computation.
|
|
1155
|
+
*
|
|
1156
|
+
* The implementation handles 4D tensor broadcasting and batch processing automatically.
|
|
1157
|
+
*
|
|
1158
|
+
* @param ctx The CANN backend context for operation execution and memory management.
|
|
1159
|
+
* @param dst The destination ggml_tensor where the outer product result will be stored.
|
|
1160
|
+
* The input tensors are assumed to be `dst->src[0]` and `dst->src[1]`.
|
|
1161
|
+
*
|
|
1162
|
+
* @see GGML_CANN_CALL_ACLNN_OP for CANN operator invocation
|
|
1163
|
+
*/
|
|
1164
|
+
void ggml_cann_out_prod(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|