whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -23,28 +23,28 @@
|
|
|
23
23
|
#ifndef CANN_COMMON_H
|
|
24
24
|
#define CANN_COMMON_H
|
|
25
25
|
|
|
26
|
+
#include "../ggml-impl.h"
|
|
27
|
+
#include "../include/ggml-cann.h"
|
|
28
|
+
#include "../include/ggml.h"
|
|
29
|
+
|
|
26
30
|
#include <acl/acl.h>
|
|
31
|
+
#include <unistd.h>
|
|
27
32
|
|
|
33
|
+
#include <atomic>
|
|
34
|
+
#include <condition_variable>
|
|
28
35
|
#include <cstdio>
|
|
36
|
+
#include <functional>
|
|
29
37
|
#include <iostream>
|
|
38
|
+
#include <list>
|
|
30
39
|
#include <map>
|
|
31
40
|
#include <memory>
|
|
32
|
-
#include <string>
|
|
33
|
-
#include <vector>
|
|
34
|
-
#include <atomic>
|
|
35
|
-
#include <condition_variable>
|
|
36
41
|
#include <mutex>
|
|
37
|
-
#include <thread>
|
|
38
|
-
#include <unistd.h>
|
|
39
|
-
#include <functional>
|
|
40
42
|
#include <optional>
|
|
41
|
-
#include <
|
|
42
|
-
|
|
43
|
-
#include
|
|
44
|
-
#include "../include/ggml.h"
|
|
45
|
-
#include "../ggml-impl.h"
|
|
43
|
+
#include <string>
|
|
44
|
+
#include <thread>
|
|
45
|
+
#include <vector>
|
|
46
46
|
|
|
47
|
-
#define MATRIX_ROW_PADDING
|
|
47
|
+
#define MATRIX_ROW_PADDING 512
|
|
48
48
|
#define GGML_CANN_MAX_STREAMS 8
|
|
49
49
|
|
|
50
50
|
/**
|
|
@@ -56,8 +56,7 @@
|
|
|
56
56
|
* @param line The line number at which the error occurred.
|
|
57
57
|
* @param msg The error message.
|
|
58
58
|
*/
|
|
59
|
-
[[noreturn]] void ggml_cann_error(const char* stmt, const char* func,
|
|
60
|
-
const char* file, int line, const char* msg);
|
|
59
|
+
[[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg);
|
|
61
60
|
|
|
62
61
|
/**
|
|
63
62
|
* @brief Checks the result of a CANN function call and invokes the error
|
|
@@ -89,25 +88,24 @@ struct ggml_cann_device_info {
|
|
|
89
88
|
* @brief Information about a single CANN device.
|
|
90
89
|
*/
|
|
91
90
|
struct cann_device_info {
|
|
92
|
-
int
|
|
91
|
+
int cc; /**< Compute capability. */
|
|
93
92
|
size_t smpb; /**< Maximum shared memory per block. */
|
|
94
|
-
bool
|
|
93
|
+
bool vmm; /**< Virtual memory support. */
|
|
95
94
|
size_t vmm_granularity; /**< Granularity of virtual memory. */
|
|
96
95
|
size_t total_vram; /**< Total video RAM available on the device. */
|
|
97
96
|
};
|
|
98
97
|
|
|
99
|
-
cann_device_info devices[GGML_CANN_MAX_DEVICES] =
|
|
100
|
-
{}; /**< Array of CANN device information. */
|
|
98
|
+
cann_device_info devices[GGML_CANN_MAX_DEVICES] = {}; /**< Array of CANN device information. */
|
|
101
99
|
};
|
|
102
100
|
|
|
103
|
-
const ggml_cann_device_info& ggml_cann_info();
|
|
101
|
+
const ggml_cann_device_info & ggml_cann_info();
|
|
104
102
|
|
|
105
|
-
void
|
|
103
|
+
void ggml_cann_set_device(int32_t device);
|
|
106
104
|
int32_t ggml_cann_get_device();
|
|
107
105
|
|
|
108
|
-
std::optional<std::string>
|
|
109
|
-
bool
|
|
110
|
-
int
|
|
106
|
+
std::optional<std::string> get_env_as_lowercase(const std::string & name);
|
|
107
|
+
bool parse_bool(const std::string & value);
|
|
108
|
+
int parse_integer(const std::string & value);
|
|
111
109
|
|
|
112
110
|
/**
|
|
113
111
|
* @brief Abstract base class for memory pools used by CANN.
|
|
@@ -126,7 +124,7 @@ struct ggml_cann_pool {
|
|
|
126
124
|
* will be stored.
|
|
127
125
|
* @return Pointer to the allocated memory block.
|
|
128
126
|
*/
|
|
129
|
-
virtual void* alloc(size_t size, size_t* actual_size) = 0;
|
|
127
|
+
virtual void * alloc(size_t size, size_t * actual_size) = 0;
|
|
130
128
|
|
|
131
129
|
/**
|
|
132
130
|
* @brief Frees a previously allocated memory block.
|
|
@@ -136,16 +134,16 @@ struct ggml_cann_pool {
|
|
|
136
134
|
* @note Note that all CANN opertors are running async. Make sure memory is
|
|
137
135
|
* still avaiable before this operator finished.
|
|
138
136
|
*/
|
|
139
|
-
virtual void free(void* ptr, size_t size) = 0;
|
|
137
|
+
virtual void free(void * ptr, size_t size) = 0;
|
|
140
138
|
};
|
|
141
139
|
|
|
142
140
|
/**
|
|
143
141
|
* @brief RAII wrapper for managing memory allocations from a CANN memory pool.
|
|
144
142
|
*/
|
|
145
143
|
struct ggml_cann_pool_alloc {
|
|
146
|
-
ggml_cann_pool* pool
|
|
147
|
-
void*
|
|
148
|
-
size_t
|
|
144
|
+
ggml_cann_pool * pool = nullptr; /**< Pointer to the memory pool. */
|
|
145
|
+
void * ptr = nullptr; /**< Pointer to the allocated memory block. */
|
|
146
|
+
size_t actual_size = 0; /**< Actual size of the allocated memory block. */
|
|
149
147
|
|
|
150
148
|
/**
|
|
151
149
|
* @brief Default constructor.
|
|
@@ -156,16 +154,14 @@ struct ggml_cann_pool_alloc {
|
|
|
156
154
|
* @brief Constructor that initializes the memory pool.
|
|
157
155
|
* @param pool Reference to the memory pool.
|
|
158
156
|
*/
|
|
159
|
-
explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {}
|
|
157
|
+
explicit ggml_cann_pool_alloc(ggml_cann_pool & pool) : pool(&pool) {}
|
|
160
158
|
|
|
161
159
|
/**
|
|
162
160
|
* @brief Constructor that initializes the memory pool and allocates memory.
|
|
163
161
|
* @param pool Reference to the memory pool.
|
|
164
162
|
* @param size Size of the memory block to allocate.
|
|
165
163
|
*/
|
|
166
|
-
ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) {
|
|
167
|
-
alloc(size);
|
|
168
|
-
}
|
|
164
|
+
ggml_cann_pool_alloc(ggml_cann_pool & pool, size_t size) : pool(&pool) { alloc(size); }
|
|
169
165
|
|
|
170
166
|
/**
|
|
171
167
|
* @brief Destructor that frees the allocated memory block.
|
|
@@ -181,7 +177,7 @@ struct ggml_cann_pool_alloc {
|
|
|
181
177
|
* @param size Size of the memory block to allocate.
|
|
182
178
|
* @return Pointer to the allocated memory block.
|
|
183
179
|
*/
|
|
184
|
-
void* alloc(size_t size) {
|
|
180
|
+
void * alloc(size_t size) {
|
|
185
181
|
GGML_ASSERT(pool != nullptr);
|
|
186
182
|
GGML_ASSERT(ptr == nullptr);
|
|
187
183
|
ptr = pool->alloc(size, &this->actual_size);
|
|
@@ -194,7 +190,7 @@ struct ggml_cann_pool_alloc {
|
|
|
194
190
|
* @param size Size of the memory block to allocate.
|
|
195
191
|
* @return Pointer to the allocated memory block.
|
|
196
192
|
*/
|
|
197
|
-
void* alloc(ggml_cann_pool& pool, size_t size) {
|
|
193
|
+
void * alloc(ggml_cann_pool & pool, size_t size) {
|
|
198
194
|
this->pool = &pool;
|
|
199
195
|
return alloc(size);
|
|
200
196
|
}
|
|
@@ -203,162 +199,175 @@ struct ggml_cann_pool_alloc {
|
|
|
203
199
|
* @brief Gets the pointer to the allocated memory block.
|
|
204
200
|
* @return Pointer to the allocated memory block.
|
|
205
201
|
*/
|
|
206
|
-
void* get() { return ptr; }
|
|
202
|
+
void * get() { return ptr; }
|
|
207
203
|
|
|
208
204
|
// Deleted copy constructor
|
|
209
|
-
ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete;
|
|
205
|
+
ggml_cann_pool_alloc(const ggml_cann_pool_alloc &) = delete;
|
|
210
206
|
|
|
211
207
|
// Deleted move constructor
|
|
212
|
-
ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete;
|
|
208
|
+
ggml_cann_pool_alloc(ggml_cann_pool_alloc &&) = delete;
|
|
213
209
|
|
|
214
210
|
// Deleted copy assignment operator
|
|
215
|
-
ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete;
|
|
211
|
+
ggml_cann_pool_alloc & operator=(const ggml_cann_pool_alloc &) = delete;
|
|
216
212
|
|
|
217
213
|
// Deleted move assignment operator
|
|
218
|
-
ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
|
|
214
|
+
ggml_cann_pool_alloc & operator=(ggml_cann_pool_alloc &&) = delete;
|
|
219
215
|
};
|
|
220
216
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
217
|
+
#ifdef USE_ACL_GRAPH
|
|
218
|
+
struct ggml_graph_node_properties {
|
|
219
|
+
// dst tensor
|
|
220
|
+
void * node_address;
|
|
221
|
+
int64_t ne[GGML_MAX_DIMS];
|
|
222
|
+
size_t nb[GGML_MAX_DIMS];
|
|
225
223
|
|
|
226
|
-
|
|
227
|
-
*
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
*/
|
|
231
|
-
class cann_task {
|
|
232
|
-
public:
|
|
233
|
-
virtual void run_task() {}
|
|
234
|
-
};
|
|
224
|
+
// src tensor
|
|
225
|
+
void * src_address[GGML_MAX_SRC];
|
|
226
|
+
int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS];
|
|
227
|
+
size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS];
|
|
235
228
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
class cann_task_queue {
|
|
240
|
-
public:
|
|
241
|
-
/**
|
|
242
|
-
* @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
|
|
243
|
-
*
|
|
244
|
-
* @param capacity Queue capacity. Must be a power of 2.
|
|
245
|
-
* @param device Target device ID (used for context setting).
|
|
246
|
-
*/
|
|
247
|
-
explicit cann_task_queue(size_t capacity, int32_t device)
|
|
248
|
-
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0),
|
|
249
|
-
running_(false), device_(device) {
|
|
250
|
-
GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
|
|
251
|
-
mask_ = capacity_ - 1;
|
|
252
|
-
}
|
|
229
|
+
// op
|
|
230
|
+
ggml_op node_op;
|
|
231
|
+
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
|
253
232
|
|
|
254
233
|
/**
|
|
255
|
-
* @brief
|
|
234
|
+
* @brief Check if a ggml tensor node matches this property set.
|
|
256
235
|
*
|
|
257
|
-
*
|
|
258
|
-
*
|
|
236
|
+
* This function compares all relevant fields (address, op type, shape, source inputs, op params)
|
|
237
|
+
* to determine whether the current node matches these previously recorded properties.
|
|
238
|
+
*
|
|
239
|
+
* @param node The current ggml tensor node.
|
|
240
|
+
* @return true if all fields match (excluding GGML_OP_VIEW); false otherwise.
|
|
259
241
|
*/
|
|
260
|
-
bool
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if (next_tail == head_) {
|
|
242
|
+
bool has_matching_properties(ggml_tensor * node) {
|
|
243
|
+
if (node->data != this->node_address && node->op != GGML_OP_VIEW) {
|
|
264
244
|
return false;
|
|
265
245
|
}
|
|
266
246
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
return true;
|
|
272
|
-
}
|
|
247
|
+
if (node->op != this->node_op) {
|
|
248
|
+
return false;
|
|
249
|
+
}
|
|
273
250
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
std::this_thread::yield();
|
|
282
|
-
continue;
|
|
251
|
+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
|
252
|
+
if (node->ne[i] != this->ne[i]) {
|
|
253
|
+
return false;
|
|
254
|
+
}
|
|
255
|
+
if (node->nb[i] != this->nb[i]) {
|
|
256
|
+
return false;
|
|
257
|
+
}
|
|
283
258
|
}
|
|
284
259
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
260
|
+
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
261
|
+
if (node->src[i]) {
|
|
262
|
+
if (node->src[i]->data != this->src_address[i] && node->op != GGML_OP_VIEW) {
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
for (int d = 0; d < GGML_MAX_DIMS; d++) {
|
|
267
|
+
if (node->src[i]->ne[d] != this->src_ne[i][d]) {
|
|
268
|
+
return false;
|
|
269
|
+
}
|
|
270
|
+
if (node->src[i]->nb[d] != this->src_nb[i][d]) {
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
} else {
|
|
275
|
+
if (this->src_address[i] != nullptr) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
288
279
|
}
|
|
289
280
|
|
|
281
|
+
if (node->op == GGML_OP_SCALE || node->op == GGML_OP_UNARY || node->op == GGML_OP_GLU) {
|
|
282
|
+
return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0;
|
|
283
|
+
}
|
|
284
|
+
return true;
|
|
290
285
|
}
|
|
286
|
+
};
|
|
291
287
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
while (running_ && head_ != tail_) {
|
|
297
|
-
std::this_thread::yield();
|
|
298
|
-
continue;
|
|
288
|
+
struct ggml_cann_graph {
|
|
289
|
+
~ggml_cann_graph() {
|
|
290
|
+
if (graph != nullptr) {
|
|
291
|
+
ACL_CHECK(aclmdlRIDestroy(graph));
|
|
299
292
|
}
|
|
300
293
|
}
|
|
301
294
|
|
|
295
|
+
aclmdlRI graph = nullptr;
|
|
296
|
+
|
|
297
|
+
std::vector<ggml_graph_node_properties> ggml_graph_properties;
|
|
298
|
+
|
|
302
299
|
/**
|
|
303
|
-
* @brief
|
|
300
|
+
* @brief Create a new CANN graph from a ggml computation graph.
|
|
301
|
+
*
|
|
302
|
+
* This function creates a new ggml_cann_graph object and fills its node properties
|
|
303
|
+
* (operation type, dimensions, strides, input sources, and operation parameters)
|
|
304
|
+
* based on the current ggml computation graph.
|
|
305
|
+
*
|
|
306
|
+
* Each node in the ggml graph is mapped to a property entry in the new CANN graph:
|
|
307
|
+
* - node address
|
|
308
|
+
* - operation type
|
|
309
|
+
* - shape (ne) and strides (nb)
|
|
310
|
+
* - source tensor addresses
|
|
311
|
+
* - operation parameters
|
|
312
|
+
*
|
|
313
|
+
* @param cgraph The current ggml computation graph.
|
|
314
|
+
* @return Pointer to the newly created ggml_cann_graph object.
|
|
304
315
|
*/
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
316
|
+
static ggml_cann_graph * create_from_cgraph(ggml_cgraph * cgraph) {
|
|
317
|
+
ggml_cann_graph * new_graph = new ggml_cann_graph();
|
|
318
|
+
new_graph->ggml_graph_properties.resize(cgraph->n_nodes);
|
|
319
|
+
|
|
320
|
+
for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) {
|
|
321
|
+
ggml_tensor * node = cgraph->nodes[node_idx];
|
|
322
|
+
auto & prop = new_graph->ggml_graph_properties[node_idx];
|
|
323
|
+
|
|
324
|
+
prop.node_address = node->data;
|
|
325
|
+
prop.node_op = node->op;
|
|
326
|
+
|
|
327
|
+
std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
|
|
328
|
+
std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
|
|
329
|
+
|
|
330
|
+
for (int src = 0; src < GGML_MAX_SRC; ++src) {
|
|
331
|
+
if (node->src[src]) {
|
|
332
|
+
prop.src_address[src] = node->src[src]->data;
|
|
333
|
+
std::copy_n(node->src[src]->ne, GGML_MAX_DIMS, prop.src_ne[src]);
|
|
334
|
+
std::copy_n(node->src[src]->nb, GGML_MAX_DIMS, prop.src_nb[src]);
|
|
335
|
+
} else {
|
|
336
|
+
prop.src_address[src] = nullptr;
|
|
337
|
+
std::fill_n(prop.src_ne[src], GGML_MAX_DIMS, 0);
|
|
338
|
+
std::fill_n(prop.src_nb[src], GGML_MAX_DIMS, 0);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS);
|
|
309
343
|
}
|
|
344
|
+
|
|
345
|
+
return new_graph;
|
|
310
346
|
}
|
|
311
347
|
|
|
312
|
-
private:
|
|
313
348
|
/**
|
|
314
|
-
* @brief
|
|
349
|
+
* @brief Check whether this CANN graph matches the given ggml computation graph.
|
|
350
|
+
*
|
|
351
|
+
* This function compares the number of nodes and each node's properties
|
|
352
|
+
* (operation type, dimensions, strides, inputs, and operation parameters)
|
|
353
|
+
* to determine whether this CANN graph matches the given ggml graph.
|
|
354
|
+
*
|
|
355
|
+
* @param cgraph The current ggml computation graph.
|
|
356
|
+
* @return true if this CANN graph matches the ggml graph; false otherwise.
|
|
315
357
|
*/
|
|
316
|
-
|
|
317
|
-
|
|
358
|
+
bool matches_cgraph(ggml_cgraph * cgraph) {
|
|
359
|
+
if (this->ggml_graph_properties.size() != static_cast<size_t>(cgraph->n_nodes)) {
|
|
360
|
+
return false;
|
|
361
|
+
}
|
|
318
362
|
|
|
319
|
-
|
|
320
|
-
if(
|
|
321
|
-
|
|
322
|
-
continue;
|
|
363
|
+
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
|
364
|
+
if (!this->ggml_graph_properties[i].has_matching_properties(cgraph->nodes[i])) {
|
|
365
|
+
return false;
|
|
323
366
|
}
|
|
324
|
-
|
|
325
|
-
std::atomic_thread_fence(std::memory_order_acquire);
|
|
326
|
-
buffer_[head_]->run_task();
|
|
327
|
-
buffer_[head_].reset();
|
|
328
|
-
head_ = (head_ + 1) & mask_;
|
|
329
367
|
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
std::vector<std::unique_ptr<cann_task>> buffer_;
|
|
333
|
-
const size_t capacity_;
|
|
334
|
-
size_t mask_;
|
|
335
|
-
size_t head_;
|
|
336
|
-
size_t tail_;
|
|
337
|
-
bool running_;
|
|
338
|
-
std::thread thread_;
|
|
339
|
-
int32_t device_;
|
|
340
|
-
};
|
|
341
368
|
|
|
342
|
-
|
|
343
|
-
struct ggml_graph_node_properties {
|
|
344
|
-
void * node_address;
|
|
345
|
-
ggml_op node_op;
|
|
346
|
-
int64_t ne[GGML_MAX_DIMS];
|
|
347
|
-
size_t nb[GGML_MAX_DIMS];
|
|
348
|
-
void * src_address[GGML_MAX_SRC];
|
|
349
|
-
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
|
350
|
-
};
|
|
351
|
-
|
|
352
|
-
struct ggml_cann_graph {
|
|
353
|
-
~ggml_cann_graph() {
|
|
354
|
-
if (graph != nullptr) {
|
|
355
|
-
ACL_CHECK(aclmdlRIDestroy(graph));
|
|
356
|
-
}
|
|
369
|
+
return true;
|
|
357
370
|
}
|
|
358
|
-
|
|
359
|
-
aclmdlRI graph = nullptr;
|
|
360
|
-
|
|
361
|
-
std::vector<ggml_graph_node_properties> ggml_graph_properties;
|
|
362
371
|
};
|
|
363
372
|
|
|
364
373
|
/**
|
|
@@ -369,13 +378,11 @@ struct ggml_cann_graph {
|
|
|
369
378
|
* move existing graphs to the front (most recently used), and clear the cache.
|
|
370
379
|
*/
|
|
371
380
|
struct ggml_cann_graph_lru_cache {
|
|
372
|
-
size_t capacity;
|
|
381
|
+
size_t capacity; /**< Maximum number of graphs in the cache. */
|
|
373
382
|
|
|
374
|
-
std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */
|
|
383
|
+
std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */
|
|
375
384
|
|
|
376
|
-
ggml_cann_graph_lru_cache() {
|
|
377
|
-
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
|
|
378
|
-
}
|
|
385
|
+
ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); }
|
|
379
386
|
|
|
380
387
|
/**
|
|
381
388
|
* @brief Push a new graph to the front of the cache.
|
|
@@ -383,24 +390,15 @@ struct ggml_cann_graph_lru_cache {
|
|
|
383
390
|
* @param new_node Pointer to the new ggml_cann_graph to cache.
|
|
384
391
|
* Ownership is transferred to the cache (cache will delete it).
|
|
385
392
|
*/
|
|
386
|
-
void push(ggml_cann_graph* new_node) {
|
|
393
|
+
void push(ggml_cann_graph * new_node) {
|
|
387
394
|
if (cache_list.size() >= capacity) {
|
|
388
|
-
ggml_cann_graph* old = cache_list.back();
|
|
395
|
+
ggml_cann_graph * old = cache_list.back();
|
|
389
396
|
cache_list.pop_back();
|
|
390
|
-
delete old;
|
|
397
|
+
delete old; // free the old graph
|
|
391
398
|
}
|
|
392
399
|
cache_list.push_front(new_node);
|
|
393
400
|
}
|
|
394
401
|
|
|
395
|
-
/**
|
|
396
|
-
* @brief Move an existing graph to the front of the cache.
|
|
397
|
-
* @param node Pointer to the ggml_cann_graph to move.
|
|
398
|
-
*/
|
|
399
|
-
void move_to_front(ggml_cann_graph* node) {
|
|
400
|
-
cache_list.remove(node);
|
|
401
|
-
cache_list.push_front(node);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
402
|
/**
|
|
405
403
|
* @brief Clear all graphs from the cache (also frees memory).
|
|
406
404
|
*/
|
|
@@ -414,92 +412,171 @@ struct ggml_cann_graph_lru_cache {
|
|
|
414
412
|
/**
|
|
415
413
|
* @brief Destructor that clears the cache and frees all cached graphs.
|
|
416
414
|
*/
|
|
417
|
-
~ggml_cann_graph_lru_cache() {
|
|
418
|
-
|
|
415
|
+
~ggml_cann_graph_lru_cache() { clear(); }
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* @brief Find a cached CANN graph that matches the given ggml graph and move it to front.
|
|
419
|
+
*
|
|
420
|
+
* This function iterates through the cached CANN graphs stored in the LRU cache and
|
|
421
|
+
* compares them against the given ggml computation graph. If a matching graph is found,
|
|
422
|
+
* it is promoted to the front of the LRU cache and returned. Otherwise, the function
|
|
423
|
+
* returns nullptr.
|
|
424
|
+
*
|
|
425
|
+
* @param cgraph The current ggml computation graph.
|
|
426
|
+
* @return true if found; false otherwise.
|
|
427
|
+
*/
|
|
428
|
+
bool find_and_move_to_front(ggml_cgraph * cgraph) {
|
|
429
|
+
for (auto & graph_ptr : this->cache_list) {
|
|
430
|
+
if (graph_ptr->matches_cgraph(cgraph)) {
|
|
431
|
+
cache_list.remove(graph_ptr);
|
|
432
|
+
cache_list.push_front(graph_ptr);
|
|
433
|
+
return true;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
return false;
|
|
419
437
|
}
|
|
420
438
|
};
|
|
421
439
|
#endif // USE_ACL_GRAPH
|
|
422
440
|
|
|
423
441
|
struct ggml_cann_rope_cache {
|
|
424
442
|
~ggml_cann_rope_cache() {
|
|
425
|
-
if(theta_scale_cache
|
|
443
|
+
if (theta_scale_cache) {
|
|
426
444
|
ACL_CHECK(aclrtFree(theta_scale_cache));
|
|
427
445
|
}
|
|
428
|
-
if(sin_cache
|
|
446
|
+
if (sin_cache) {
|
|
429
447
|
ACL_CHECK(aclrtFree(sin_cache));
|
|
430
448
|
}
|
|
431
|
-
if(cos_cache
|
|
449
|
+
if (cos_cache) {
|
|
432
450
|
ACL_CHECK(aclrtFree(cos_cache));
|
|
433
451
|
}
|
|
452
|
+
if (position_select_index) {
|
|
453
|
+
ACL_CHECK(aclrtFree(position_select_index));
|
|
454
|
+
}
|
|
455
|
+
if (theta_scale_exp_host) {
|
|
456
|
+
free(theta_scale_exp_host);
|
|
457
|
+
}
|
|
458
|
+
if (position_select_index_host) {
|
|
459
|
+
free(position_select_index_host);
|
|
460
|
+
}
|
|
461
|
+
if (yarn_ramp_cache) {
|
|
462
|
+
ACL_CHECK(aclrtFree(yarn_ramp_cache));
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
bool equal(int64_t theta_scale_length,
|
|
467
|
+
int64_t position_length,
|
|
468
|
+
float ext_factor,
|
|
469
|
+
float theta_scale,
|
|
470
|
+
float freq_scale,
|
|
471
|
+
float attn_factor,
|
|
472
|
+
bool is_neox,
|
|
473
|
+
bool indep_sects,
|
|
474
|
+
bool mrope_used,
|
|
475
|
+
bool is_imrope,
|
|
476
|
+
int sections[4]) {
|
|
477
|
+
return this->theta_scale_length == theta_scale_length && this->position_length == position_length &&
|
|
478
|
+
this->ext_factor == ext_factor && this->theta_scale == theta_scale && this->freq_scale == freq_scale &&
|
|
479
|
+
this->attn_factor == attn_factor && this->is_neox == is_neox && this->indep_sects == indep_sects &&
|
|
480
|
+
this->mrope_used == mrope_used && this->is_imrope == is_imrope && this->sections[0] == sections[0] &&
|
|
481
|
+
this->sections[1] == sections[1] && this->sections[2] == sections[2] && this->sections[3] == sections[3];
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
void set(int64_t theta_scale_length,
|
|
485
|
+
int64_t position_length,
|
|
486
|
+
float ext_factor,
|
|
487
|
+
float theta_scale,
|
|
488
|
+
float freq_scale,
|
|
489
|
+
float attn_factor,
|
|
490
|
+
bool is_neox,
|
|
491
|
+
bool indep_sects,
|
|
492
|
+
bool mrope_used,
|
|
493
|
+
bool is_imrope,
|
|
494
|
+
int sections[4]) {
|
|
495
|
+
this->theta_scale_length = theta_scale_length;
|
|
496
|
+
this->position_length = position_length;
|
|
497
|
+
this->ext_factor = ext_factor;
|
|
498
|
+
this->theta_scale = theta_scale;
|
|
499
|
+
this->freq_scale = freq_scale;
|
|
500
|
+
this->attn_factor = attn_factor;
|
|
501
|
+
this->is_neox = is_neox;
|
|
502
|
+
this->indep_sects = indep_sects;
|
|
503
|
+
this->mrope_used = mrope_used;
|
|
504
|
+
this->is_imrope = is_imrope;
|
|
505
|
+
this->sections[0] = sections[0];
|
|
506
|
+
this->sections[1] = sections[1];
|
|
507
|
+
this->sections[2] = sections[2];
|
|
508
|
+
this->sections[3] = sections[3];
|
|
434
509
|
}
|
|
435
510
|
|
|
436
|
-
|
|
437
|
-
|
|
511
|
+
// memory cache, prepare before inferencing.
|
|
512
|
+
void * theta_scale_cache = nullptr;
|
|
513
|
+
float * theta_scale_exp_host = nullptr;
|
|
514
|
+
int * position_select_index_host = nullptr;
|
|
515
|
+
void * position_select_index = nullptr;
|
|
516
|
+
void * yarn_ramp_cache = nullptr;
|
|
438
517
|
// sin/cos cache, used only to accelerate first layer on each device
|
|
439
|
-
void*
|
|
440
|
-
void*
|
|
441
|
-
int64_t position_length = 0;
|
|
518
|
+
void * sin_cache = nullptr;
|
|
519
|
+
void * cos_cache = nullptr;
|
|
442
520
|
// Properties to check before reusing the sincos cache
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
float
|
|
447
|
-
float
|
|
448
|
-
|
|
521
|
+
int64_t theta_scale_length = 0;
|
|
522
|
+
int64_t position_length = 0;
|
|
523
|
+
bool cached = false;
|
|
524
|
+
float ext_factor = 0.0f;
|
|
525
|
+
float theta_scale = 0.0f;
|
|
526
|
+
float freq_scale = 0.0f;
|
|
527
|
+
float attn_factor = 0.0f;
|
|
528
|
+
bool is_neox = false;
|
|
529
|
+
bool indep_sects = false;
|
|
530
|
+
bool mrope_used = false;
|
|
531
|
+
int sections[4] = { 0, 0, 0, 0 };
|
|
532
|
+
bool is_imrope = false;
|
|
449
533
|
};
|
|
450
534
|
|
|
451
535
|
struct ggml_cann_tensor_cache {
|
|
452
536
|
~ggml_cann_tensor_cache() {
|
|
453
|
-
if(cache != nullptr) {
|
|
537
|
+
if (cache != nullptr) {
|
|
454
538
|
ACL_CHECK(aclrtFree(cache));
|
|
455
539
|
}
|
|
456
540
|
}
|
|
457
541
|
|
|
458
|
-
void*
|
|
459
|
-
int64_t size
|
|
542
|
+
void * cache = nullptr;
|
|
543
|
+
int64_t size = 0;
|
|
460
544
|
};
|
|
461
545
|
|
|
462
546
|
/**
|
|
463
547
|
* @brief Context for managing CANN backend operations.
|
|
464
548
|
*/
|
|
465
549
|
struct ggml_backend_cann_context {
|
|
466
|
-
int32_t
|
|
467
|
-
std::string name;
|
|
468
|
-
std::string description;
|
|
469
|
-
aclrtEvent
|
|
550
|
+
int32_t device; /**< Device ID. */
|
|
551
|
+
std::string name; /**< Name of the device. */
|
|
552
|
+
std::string description; /**< Description of the device. */
|
|
553
|
+
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
|
|
470
554
|
#ifdef USE_ACL_GRAPH
|
|
471
555
|
/// Cached CANN ACL graph used for executing the current ggml computation graph.
|
|
472
556
|
ggml_cann_graph_lru_cache graph_lru_cache;
|
|
473
|
-
bool
|
|
557
|
+
bool acl_graph_mode = true;
|
|
474
558
|
#endif
|
|
475
|
-
|
|
476
|
-
bool async_mode;
|
|
559
|
+
bool async_mode;
|
|
477
560
|
// Rope Cache
|
|
478
|
-
ggml_cann_rope_cache
|
|
561
|
+
ggml_cann_rope_cache rope_cache;
|
|
479
562
|
// Constant Pool
|
|
480
563
|
ggml_cann_tensor_cache rms_norm_one_tensor_cache;
|
|
481
564
|
ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
|
|
482
565
|
|
|
483
|
-
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
|
|
566
|
+
aclrtStream streams[GGML_CANN_MAX_STREAMS] = { nullptr }; /**< Array of streams for the device. */
|
|
484
567
|
|
|
485
568
|
/**
|
|
486
569
|
* @brief Constructor for initializing the context with a given device.
|
|
487
570
|
* @param device Device ID.
|
|
488
571
|
*/
|
|
489
|
-
explicit ggml_backend_cann_context(int device)
|
|
490
|
-
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
|
|
572
|
+
explicit ggml_backend_cann_context(int device) : device(device), name("CANN" + std::to_string(device)) {
|
|
491
573
|
ggml_cann_set_device(device);
|
|
492
574
|
description = aclrtGetSocName();
|
|
493
575
|
|
|
494
|
-
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
|
|
495
|
-
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
|
496
|
-
device, async_mode ? "ON" : "OFF");
|
|
497
576
|
#ifdef USE_ACL_GRAPH
|
|
498
577
|
acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on"));
|
|
499
|
-
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
|
|
500
|
-
|
|
501
|
-
acl_graph_mode ? "GRAPH" : "EAGER",
|
|
502
|
-
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
|
|
578
|
+
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER",
|
|
579
|
+
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
|
|
503
580
|
#endif
|
|
504
581
|
}
|
|
505
582
|
|
|
@@ -508,7 +585,6 @@ struct ggml_backend_cann_context {
|
|
|
508
585
|
*/
|
|
509
586
|
~ggml_backend_cann_context() {
|
|
510
587
|
ggml_cann_set_device(device);
|
|
511
|
-
task_queue.stop();
|
|
512
588
|
if (copy_event != nullptr) {
|
|
513
589
|
ACL_CHECK(aclrtDestroyEvent(copy_event));
|
|
514
590
|
}
|
|
@@ -542,8 +618,7 @@ struct ggml_backend_cann_context {
|
|
|
542
618
|
aclrtStream stream() { return stream(0); }
|
|
543
619
|
|
|
544
620
|
// TODO: each stream should have a memory pool.
|
|
545
|
-
std::unique_ptr<ggml_cann_pool>
|
|
546
|
-
mem_pool; /**< Memory pool for the device. */
|
|
621
|
+
std::unique_ptr<ggml_cann_pool> mem_pool; /**< Memory pool for the device. */
|
|
547
622
|
|
|
548
623
|
/**
|
|
549
624
|
* @brief Create a new memory pool for a given device.
|
|
@@ -556,7 +631,7 @@ struct ggml_backend_cann_context {
|
|
|
556
631
|
* @brief Get or create the memory pool for the context.
|
|
557
632
|
* @return Reference to the memory pool.
|
|
558
633
|
*/
|
|
559
|
-
ggml_cann_pool& pool() {
|
|
634
|
+
ggml_cann_pool & pool() {
|
|
560
635
|
if (mem_pool == nullptr) {
|
|
561
636
|
mem_pool = new_pool_for_device(device);
|
|
562
637
|
}
|