whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -259,7 +259,7 @@ extern "C" {
|
|
|
259
259
|
Example usage:
|
|
260
260
|
|
|
261
261
|
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
|
262
|
-
//
|
|
262
|
+
// preferably to run on the same backend as the buffer
|
|
263
263
|
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
|
264
264
|
|
|
265
265
|
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false, true);
|
|
@@ -19,6 +19,9 @@ extern "C" {
|
|
|
19
19
|
// abort ggml_graph_compute when true
|
|
20
20
|
ggml_abort_callback abort_callback;
|
|
21
21
|
void * abort_callback_data;
|
|
22
|
+
|
|
23
|
+
// use only reference implementations
|
|
24
|
+
bool use_ref;
|
|
22
25
|
};
|
|
23
26
|
|
|
24
27
|
// numa strategies
|
|
@@ -132,6 +135,8 @@ extern "C" {
|
|
|
132
135
|
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
|
|
133
136
|
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
|
134
137
|
|
|
138
|
+
GGML_BACKEND_API void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref);
|
|
139
|
+
|
|
135
140
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
136
141
|
|
|
137
142
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml-backend.h"
|
|
4
|
+
|
|
5
|
+
#include <cstring>
|
|
6
|
+
|
|
7
|
+
#ifdef __cplusplus
|
|
8
|
+
extern "C" {
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#define GGML_OPENVINO_NAME "OPENVINO"
|
|
12
|
+
|
|
13
|
+
// backend API
|
|
14
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device);
|
|
15
|
+
|
|
16
|
+
GGML_BACKEND_API bool ggml_backend_is_openvino(ggml_backend_t backend);
|
|
17
|
+
|
|
18
|
+
GGML_BACKEND_API bool ggml_backend_buffer_is_openvino(ggml_backend_buffer_t buffer);
|
|
19
|
+
|
|
20
|
+
GGML_BACKEND_API bool ggml_backend_buft_is_openvino(ggml_backend_buffer_type_t buft);
|
|
21
|
+
|
|
22
|
+
GGML_BACKEND_API bool ggml_backend_buft_is_openvino_host(ggml_backend_buffer_type_t buft);
|
|
23
|
+
|
|
24
|
+
GGML_BACKEND_API size_t ggml_backend_openvino_buffer_get_ctx_id(ggml_backend_buffer_t buffer);
|
|
25
|
+
|
|
26
|
+
// device buffer
|
|
27
|
+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_buffer_type(int device);
|
|
28
|
+
|
|
29
|
+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(int device);
|
|
30
|
+
|
|
31
|
+
GGML_BACKEND_API int ggml_backend_openvino_get_device_count(void);
|
|
32
|
+
|
|
33
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg(void);
|
|
34
|
+
|
|
35
|
+
#ifdef __cplusplus
|
|
36
|
+
}
|
|
37
|
+
#endif
|
|
@@ -138,7 +138,7 @@ extern "C" {
|
|
|
138
138
|
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
|
139
139
|
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
|
140
140
|
|
|
141
|
-
// set gradients to zero,
|
|
141
|
+
// set gradients to zero, initialize loss, and optionally reset the optimizer
|
|
142
142
|
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
|
143
143
|
|
|
144
144
|
GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
|
|
@@ -8,7 +8,12 @@ extern "C" {
|
|
|
8
8
|
|
|
9
9
|
#define RPC_PROTO_MAJOR_VERSION 3
|
|
10
10
|
#define RPC_PROTO_MINOR_VERSION 6
|
|
11
|
-
#define RPC_PROTO_PATCH_VERSION
|
|
11
|
+
#define RPC_PROTO_PATCH_VERSION 1
|
|
12
|
+
|
|
13
|
+
#ifdef __cplusplus
|
|
14
|
+
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT has changed - update RPC_PROTO_PATCH_VERSION");
|
|
15
|
+
#endif
|
|
16
|
+
|
|
12
17
|
#define GGML_RPC_MAX_SERVERS 16
|
|
13
18
|
|
|
14
19
|
// backend API
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// This documentation is still a work in progress.
|
|
7
7
|
// If you wish some specific topics to be covered, feel free to drop a comment:
|
|
8
8
|
//
|
|
9
|
-
// https://github.com/
|
|
9
|
+
// https://github.com/ggml-org/whisper.cpp/issues/40
|
|
10
10
|
//
|
|
11
11
|
// ## Overview
|
|
12
12
|
//
|
|
@@ -427,7 +427,8 @@ extern "C" {
|
|
|
427
427
|
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
428
428
|
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
429
429
|
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
|
430
|
-
|
|
430
|
+
GGML_TYPE_NVFP4 = 40, // NVFP4 (4 blocks, E4M3 scale)
|
|
431
|
+
GGML_TYPE_COUNT = 41,
|
|
431
432
|
};
|
|
432
433
|
|
|
433
434
|
// precision
|
|
@@ -463,6 +464,7 @@ extern "C" {
|
|
|
463
464
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
464
465
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
465
466
|
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
|
|
467
|
+
GGML_FTYPE_MOSTLY_NVFP4 = 26, // except 1d tensors
|
|
466
468
|
};
|
|
467
469
|
|
|
468
470
|
// available tensor operations:
|
|
@@ -556,6 +558,7 @@ extern "C" {
|
|
|
556
558
|
GGML_OP_GATED_LINEAR_ATTN,
|
|
557
559
|
GGML_OP_RWKV_WKV7,
|
|
558
560
|
GGML_OP_SOLVE_TRI,
|
|
561
|
+
GGML_OP_GATED_DELTA_NET,
|
|
559
562
|
|
|
560
563
|
GGML_OP_UNARY,
|
|
561
564
|
|
|
@@ -630,10 +633,11 @@ extern "C" {
|
|
|
630
633
|
|
|
631
634
|
// this tensor...
|
|
632
635
|
enum ggml_tensor_flag {
|
|
633
|
-
GGML_TENSOR_FLAG_INPUT
|
|
634
|
-
GGML_TENSOR_FLAG_OUTPUT
|
|
635
|
-
GGML_TENSOR_FLAG_PARAM
|
|
636
|
-
GGML_TENSOR_FLAG_LOSS
|
|
636
|
+
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
|
637
|
+
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
|
638
|
+
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
|
639
|
+
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
640
|
+
GGML_TENSOR_FLAG_COMPUTE = 16, // ...must be computed
|
|
637
641
|
};
|
|
638
642
|
|
|
639
643
|
enum ggml_tri_type {
|
|
@@ -751,6 +755,7 @@ extern "C" {
|
|
|
751
755
|
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
|
752
756
|
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
|
753
757
|
GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
|
|
758
|
+
GGML_API bool ggml_is_view (const struct ggml_tensor * tensor);
|
|
754
759
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
|
755
760
|
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
|
756
761
|
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
|
@@ -2465,6 +2470,17 @@ extern "C" {
|
|
|
2465
2470
|
bool lower,
|
|
2466
2471
|
bool uni);
|
|
2467
2472
|
|
|
2473
|
+
// TODO: add ggml_gated_delta_net_set_bcast() to be able to configure Q, K broadcast type: tiled vs interleaved [TAG_GGML_GDN_BCAST]
|
|
2474
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/19468#discussion_r2786394306
|
|
2475
|
+
GGML_API struct ggml_tensor * ggml_gated_delta_net(
|
|
2476
|
+
struct ggml_context * ctx,
|
|
2477
|
+
struct ggml_tensor * q,
|
|
2478
|
+
struct ggml_tensor * k,
|
|
2479
|
+
struct ggml_tensor * v,
|
|
2480
|
+
struct ggml_tensor * g,
|
|
2481
|
+
struct ggml_tensor * beta,
|
|
2482
|
+
struct ggml_tensor * state);
|
|
2483
|
+
|
|
2468
2484
|
// custom operators
|
|
2469
2485
|
|
|
2470
2486
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
|
@@ -2577,11 +2593,42 @@ extern "C" {
|
|
|
2577
2593
|
struct ggml_tensor * grad,
|
|
2578
2594
|
struct ggml_tensor * sgd_params); // alpha, weight decay
|
|
2579
2595
|
|
|
2596
|
+
// build forward multiple tensors and select one of them for computing
|
|
2597
|
+
// this is useful for creating graphs that have constant topology but compute different things based on the input
|
|
2598
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/18550
|
|
2580
2599
|
//
|
|
2581
|
-
//
|
|
2600
|
+
// nodes:
|
|
2601
|
+
// | - build forward into the graph but do not compute
|
|
2602
|
+
// c - build forward into the graph and compute
|
|
2582
2603
|
//
|
|
2604
|
+
// | | ... c ... |
|
|
2605
|
+
// | | ... c ... |
|
|
2606
|
+
// | | ... c ... |
|
|
2607
|
+
// [0 1 ... idx ... n-1] <-- ggml_build_forward_select(..., n, idx)
|
|
2608
|
+
// c
|
|
2609
|
+
// c
|
|
2610
|
+
//
|
|
2611
|
+
// example:
|
|
2612
|
+
// struct ggml_tensor * curs[3];
|
|
2613
|
+
//
|
|
2614
|
+
// curs[0] = compute0(...);
|
|
2615
|
+
// curs[1] = compute1(...);
|
|
2616
|
+
// curs[2] = compute2(...);
|
|
2617
|
+
//
|
|
2618
|
+
// int idx = select_branch(some_input);
|
|
2619
|
+
//
|
|
2620
|
+
// struct ggml_tensor * out = ggml_build_forward_select(cgraph, curs, 3, idx);
|
|
2621
|
+
//
|
|
2622
|
+
GGML_API struct ggml_tensor * ggml_build_forward_select(
|
|
2623
|
+
struct ggml_cgraph * cgraph,
|
|
2624
|
+
struct ggml_tensor ** tensors,
|
|
2625
|
+
int n_tensors,
|
|
2626
|
+
int idx);
|
|
2627
|
+
|
|
2628
|
+
GGML_API void ggml_build_forward_expand(
|
|
2629
|
+
struct ggml_cgraph * cgraph,
|
|
2630
|
+
struct ggml_tensor * tensor);
|
|
2583
2631
|
|
|
2584
|
-
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
2585
2632
|
GGML_API void ggml_build_backward_expand(
|
|
2586
2633
|
struct ggml_context * ctx, // context for gradient computation
|
|
2587
2634
|
struct ggml_cgraph * cgraph,
|
|
@@ -2613,7 +2660,7 @@ extern "C" {
|
|
|
2613
2660
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
2614
2661
|
|
|
2615
2662
|
// dump the graph into a file using the dot format
|
|
2616
|
-
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph *
|
|
2663
|
+
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename);
|
|
2617
2664
|
|
|
2618
2665
|
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
|
|
2619
2666
|
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
|
@@ -222,6 +222,7 @@ if (GGML_SCHED_NO_REALLOC)
|
|
|
222
222
|
endif()
|
|
223
223
|
|
|
224
224
|
add_library(ggml
|
|
225
|
+
ggml-backend-dl.cpp
|
|
225
226
|
ggml-backend-reg.cpp)
|
|
226
227
|
add_library(ggml::ggml ALIAS ggml)
|
|
227
228
|
|
|
@@ -451,6 +452,7 @@ ggml_add_backend(HIP)
|
|
|
451
452
|
ggml_add_backend(METAL)
|
|
452
453
|
ggml_add_backend(MUSA)
|
|
453
454
|
ggml_add_backend(RPC)
|
|
455
|
+
ggml_add_backend(VirtGPU)
|
|
454
456
|
ggml_add_backend(SYCL)
|
|
455
457
|
ggml_add_backend(Vulkan)
|
|
456
458
|
ggml_add_backend(WebGPU)
|
|
@@ -458,6 +460,7 @@ ggml_add_backend(zDNN)
|
|
|
458
460
|
ggml_add_backend(OpenCL)
|
|
459
461
|
ggml_add_backend(Hexagon)
|
|
460
462
|
ggml_add_backend(ZenDNN)
|
|
463
|
+
ggml_add_backend(OPENVINO)
|
|
461
464
|
|
|
462
465
|
foreach (target ggml-base ggml)
|
|
463
466
|
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
|
@@ -17,11 +17,6 @@
|
|
|
17
17
|
//#define AT_PRINTF(...) GGML_LOG_DEBUG(__VA_ARGS__)
|
|
18
18
|
#define AT_PRINTF(...)
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
static bool ggml_is_view(const struct ggml_tensor * t) {
|
|
22
|
-
return t->view_src != NULL;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
20
|
// ops that return true for this function must not use restrict pointers for their backend implementations
|
|
26
21
|
bool ggml_op_can_inplace(enum ggml_op op) {
|
|
27
22
|
switch (op) {
|
|
@@ -627,7 +622,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
|
|
|
627
622
|
GGML_ASSERT(buffer_id >= 0);
|
|
628
623
|
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
|
|
629
624
|
|
|
630
|
-
if (!ggml_gallocr_is_allocated(galloc, node) && !
|
|
625
|
+
if (!ggml_gallocr_is_allocated(galloc, node) && !ggml_impl_is_view(node)) {
|
|
631
626
|
hn->allocated = true;
|
|
632
627
|
assert(hn->addr.offset == 0);
|
|
633
628
|
|
|
@@ -658,7 +653,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
|
|
|
658
653
|
|
|
659
654
|
struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent);
|
|
660
655
|
if (p_hn->n_children == 1 && p_hn->n_views == 0) {
|
|
661
|
-
if (
|
|
656
|
+
if (ggml_impl_is_view(parent)) {
|
|
662
657
|
struct ggml_tensor * view_src = parent->view_src;
|
|
663
658
|
struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src);
|
|
664
659
|
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
|
|
@@ -739,7 +734,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
|
739
734
|
// GGML_OP_NONE does not appear normally in the graph nodes, but is used by ggml-backend to add dependencies to
|
|
740
735
|
// control when some tensors are allocated and freed. in this case, the dependencies are in `src`, but the node
|
|
741
736
|
// itself is never used and should not be considered a dependency
|
|
742
|
-
if (
|
|
737
|
+
if (ggml_impl_is_view(node) && node->op != GGML_OP_NONE) {
|
|
743
738
|
struct ggml_tensor * view_src = node->view_src;
|
|
744
739
|
ggml_gallocr_hash_get(galloc, view_src)->n_views += 1;
|
|
745
740
|
}
|
|
@@ -806,7 +801,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
|
806
801
|
parent->name, p_hn->n_children, p_hn->n_views, p_hn->allocated);
|
|
807
802
|
|
|
808
803
|
if (p_hn->n_children == 0 && p_hn->n_views == 0) {
|
|
809
|
-
if (
|
|
804
|
+
if (ggml_impl_is_view(parent)) {
|
|
810
805
|
struct ggml_tensor * view_src = parent->view_src;
|
|
811
806
|
struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src);
|
|
812
807
|
view_src_hn->n_views -= 1;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#include "ggml-backend-dl.h"
|
|
2
|
+
|
|
3
|
+
#ifdef _WIN32
|
|
4
|
+
|
|
5
|
+
dl_handle * dl_load_library(const fs::path & path) {
|
|
6
|
+
// suppress error dialogs for missing DLLs
|
|
7
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
8
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
9
|
+
|
|
10
|
+
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
11
|
+
|
|
12
|
+
SetErrorMode(old_mode);
|
|
13
|
+
|
|
14
|
+
return handle;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
18
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
19
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
20
|
+
|
|
21
|
+
void * p = (void *) GetProcAddress(handle, name);
|
|
22
|
+
|
|
23
|
+
SetErrorMode(old_mode);
|
|
24
|
+
|
|
25
|
+
return p;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const char * dl_error() {
|
|
29
|
+
return "";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
#else
|
|
33
|
+
|
|
34
|
+
dl_handle * dl_load_library(const fs::path & path) {
|
|
35
|
+
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
36
|
+
return handle;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
40
|
+
return dlsym(handle, name);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const char * dl_error() {
|
|
44
|
+
const char *rslt = dlerror();
|
|
45
|
+
return rslt != nullptr ? rslt : "";
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
#endif
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#ifdef _WIN32
|
|
4
|
+
# define WIN32_LEAN_AND_MEAN
|
|
5
|
+
# ifndef NOMINMAX
|
|
6
|
+
# define NOMINMAX
|
|
7
|
+
# endif
|
|
8
|
+
# include <windows.h>
|
|
9
|
+
# include <winevt.h>
|
|
10
|
+
#else
|
|
11
|
+
# include <dlfcn.h>
|
|
12
|
+
# include <unistd.h>
|
|
13
|
+
#endif
|
|
14
|
+
#include <filesystem>
|
|
15
|
+
|
|
16
|
+
namespace fs = std::filesystem;
|
|
17
|
+
|
|
18
|
+
#ifdef _WIN32
|
|
19
|
+
|
|
20
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
21
|
+
|
|
22
|
+
struct dl_handle_deleter {
|
|
23
|
+
void operator()(HMODULE handle) {
|
|
24
|
+
FreeLibrary(handle);
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
#else
|
|
29
|
+
|
|
30
|
+
using dl_handle = void;
|
|
31
|
+
|
|
32
|
+
struct dl_handle_deleter {
|
|
33
|
+
void operator()(void * handle) {
|
|
34
|
+
dlclose(handle);
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
#endif
|
|
39
|
+
|
|
40
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
41
|
+
|
|
42
|
+
dl_handle * dl_load_library(const fs::path & path);
|
|
43
|
+
void * dl_get_sym(dl_handle * handle, const char * name);
|
|
44
|
+
const char * dl_error();
|
|
45
|
+
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include "ggml-backend-impl.h"
|
|
2
2
|
#include "ggml-backend.h"
|
|
3
|
+
#include "ggml-backend-dl.h"
|
|
3
4
|
#include "ggml-impl.h"
|
|
4
5
|
#include <algorithm>
|
|
5
6
|
#include <cstring>
|
|
@@ -69,6 +70,10 @@
|
|
|
69
70
|
#include "ggml-rpc.h"
|
|
70
71
|
#endif
|
|
71
72
|
|
|
73
|
+
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
|
74
|
+
#include "ggml-virtgpu.h"
|
|
75
|
+
#endif
|
|
76
|
+
|
|
72
77
|
#ifdef GGML_USE_CANN
|
|
73
78
|
#include "ggml-cann.h"
|
|
74
79
|
#endif
|
|
@@ -77,105 +82,27 @@
|
|
|
77
82
|
#include "ggml-zendnn.h"
|
|
78
83
|
#endif
|
|
79
84
|
|
|
80
|
-
|
|
81
|
-
#
|
|
82
|
-
# pragma clang diagnostic push
|
|
83
|
-
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
84
|
-
#elif defined(__GNUC__)
|
|
85
|
-
# pragma GCC diagnostic push
|
|
86
|
-
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
85
|
+
#ifdef GGML_USE_OPENVINO
|
|
86
|
+
#include "ggml-openvino.h"
|
|
87
87
|
#endif
|
|
88
88
|
|
|
89
89
|
namespace fs = std::filesystem;
|
|
90
90
|
|
|
91
91
|
static std::string path_str(const fs::path & path) {
|
|
92
|
-
std::string u8path;
|
|
93
92
|
try {
|
|
94
93
|
#if defined(__cpp_lib_char8_t)
|
|
95
94
|
// C++20 and later: u8string() returns std::u8string
|
|
96
|
-
std::u8string u8str = path.u8string();
|
|
97
|
-
|
|
95
|
+
const std::u8string u8str = path.u8string();
|
|
96
|
+
return std::string(reinterpret_cast<const char *>(u8str.data()), u8str.size());
|
|
98
97
|
#else
|
|
99
98
|
// C++17: u8string() returns std::string
|
|
100
|
-
|
|
99
|
+
return path.u8string();
|
|
101
100
|
#endif
|
|
102
101
|
} catch (...) {
|
|
102
|
+
return std::string();
|
|
103
103
|
}
|
|
104
|
-
return u8path;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
#if defined(__clang__)
|
|
108
|
-
# pragma clang diagnostic pop
|
|
109
|
-
#elif defined(__GNUC__)
|
|
110
|
-
# pragma GCC diagnostic pop
|
|
111
|
-
#endif
|
|
112
|
-
|
|
113
|
-
#ifdef _WIN32
|
|
114
|
-
|
|
115
|
-
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
116
|
-
|
|
117
|
-
struct dl_handle_deleter {
|
|
118
|
-
void operator()(HMODULE handle) {
|
|
119
|
-
FreeLibrary(handle);
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
static dl_handle * dl_load_library(const fs::path & path) {
|
|
124
|
-
// suppress error dialogs for missing DLLs
|
|
125
|
-
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
126
|
-
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
127
|
-
|
|
128
|
-
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
129
|
-
|
|
130
|
-
SetErrorMode(old_mode);
|
|
131
|
-
|
|
132
|
-
return handle;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
136
|
-
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
137
|
-
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
138
|
-
|
|
139
|
-
void * p = (void *) GetProcAddress(handle, name);
|
|
140
|
-
|
|
141
|
-
SetErrorMode(old_mode);
|
|
142
|
-
|
|
143
|
-
return p;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
static const char * dl_error() {
|
|
147
|
-
return "";
|
|
148
104
|
}
|
|
149
105
|
|
|
150
|
-
#else
|
|
151
|
-
|
|
152
|
-
using dl_handle = void;
|
|
153
|
-
|
|
154
|
-
struct dl_handle_deleter {
|
|
155
|
-
void operator()(void * handle) {
|
|
156
|
-
dlclose(handle);
|
|
157
|
-
}
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
static void * dl_load_library(const fs::path & path) {
|
|
161
|
-
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
162
|
-
|
|
163
|
-
return handle;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
167
|
-
return dlsym(handle, name);
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
static const char * dl_error() {
|
|
171
|
-
const char *rslt = dlerror();
|
|
172
|
-
return rslt != nullptr ? rslt : "";
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
#endif
|
|
176
|
-
|
|
177
|
-
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
178
|
-
|
|
179
106
|
struct ggml_backend_reg_entry {
|
|
180
107
|
ggml_backend_reg_t reg;
|
|
181
108
|
dl_handle_ptr handle;
|
|
@@ -196,7 +123,12 @@ struct ggml_backend_registry {
|
|
|
196
123
|
register_backend(ggml_backend_sycl_reg());
|
|
197
124
|
#endif
|
|
198
125
|
#ifdef GGML_USE_VULKAN
|
|
126
|
+
// Add runtime disable check
|
|
127
|
+
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
|
|
199
128
|
register_backend(ggml_backend_vk_reg());
|
|
129
|
+
} else {
|
|
130
|
+
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
|
|
131
|
+
}
|
|
200
132
|
#endif
|
|
201
133
|
#ifdef GGML_USE_WEBGPU
|
|
202
134
|
register_backend(ggml_backend_webgpu_reg());
|
|
@@ -204,6 +136,10 @@ struct ggml_backend_registry {
|
|
|
204
136
|
#ifdef GGML_USE_ZDNN
|
|
205
137
|
register_backend(ggml_backend_zdnn_reg());
|
|
206
138
|
#endif
|
|
139
|
+
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
|
140
|
+
register_backend(ggml_backend_virtgpu_reg());
|
|
141
|
+
#endif
|
|
142
|
+
|
|
207
143
|
#ifdef GGML_USE_OPENCL
|
|
208
144
|
register_backend(ggml_backend_opencl_reg());
|
|
209
145
|
#endif
|
|
@@ -222,6 +158,9 @@ struct ggml_backend_registry {
|
|
|
222
158
|
#ifdef GGML_USE_RPC
|
|
223
159
|
register_backend(ggml_backend_rpc_reg());
|
|
224
160
|
#endif
|
|
161
|
+
#ifdef GGML_USE_OPENVINO
|
|
162
|
+
register_backend(ggml_backend_openvino_reg());
|
|
163
|
+
#endif
|
|
225
164
|
#ifdef GGML_USE_CPU
|
|
226
165
|
register_backend(ggml_backend_cpu_reg());
|
|
227
166
|
#endif
|
|
@@ -539,9 +478,10 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
539
478
|
|
|
540
479
|
int best_score = 0;
|
|
541
480
|
fs::path best_path;
|
|
481
|
+
std::error_code ec;
|
|
542
482
|
|
|
543
483
|
for (const auto & search_path : search_paths) {
|
|
544
|
-
if (
|
|
484
|
+
if (!fs::exists(search_path, ec)) {
|
|
545
485
|
if (ec) {
|
|
546
486
|
GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
|
|
547
487
|
} else {
|
|
@@ -551,7 +491,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
551
491
|
}
|
|
552
492
|
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
553
493
|
for (const auto & entry : dir_it) {
|
|
554
|
-
if (entry.is_regular_file()) {
|
|
494
|
+
if (entry.is_regular_file(ec)) {
|
|
555
495
|
auto filename = entry.path().filename();
|
|
556
496
|
auto ext = entry.path().extension();
|
|
557
497
|
if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
|
|
@@ -620,9 +560,11 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
620
560
|
ggml_backend_load_best("rpc", silent, dir_path);
|
|
621
561
|
ggml_backend_load_best("sycl", silent, dir_path);
|
|
622
562
|
ggml_backend_load_best("vulkan", silent, dir_path);
|
|
563
|
+
ggml_backend_load_best("virtgpu", silent, dir_path);
|
|
623
564
|
ggml_backend_load_best("opencl", silent, dir_path);
|
|
624
565
|
ggml_backend_load_best("hexagon", silent, dir_path);
|
|
625
566
|
ggml_backend_load_best("musa", silent, dir_path);
|
|
567
|
+
ggml_backend_load_best("openvino", silent, dir_path);
|
|
626
568
|
ggml_backend_load_best("cpu", silent, dir_path);
|
|
627
569
|
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
|
|
628
570
|
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
|
|
@@ -258,6 +258,7 @@ void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor *
|
|
|
258
258
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
|
259
259
|
|
|
260
260
|
if (backend->iface.set_tensor_async == NULL) {
|
|
261
|
+
ggml_backend_synchronize(backend);
|
|
261
262
|
ggml_backend_tensor_set(tensor, data, offset, size);
|
|
262
263
|
} else {
|
|
263
264
|
backend->iface.set_tensor_async(backend, tensor, data, offset, size);
|
|
@@ -271,6 +272,7 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
|
|
|
271
272
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
|
272
273
|
|
|
273
274
|
if (backend->iface.get_tensor_async == NULL) {
|
|
275
|
+
ggml_backend_synchronize(backend);
|
|
274
276
|
ggml_backend_tensor_get(tensor, data, offset, size);
|
|
275
277
|
} else {
|
|
276
278
|
backend->iface.get_tensor_async(backend, tensor, data, offset, size);
|
|
@@ -874,9 +876,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
|
874
876
|
}
|
|
875
877
|
if (sched->debug > 1) {
|
|
876
878
|
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
|
877
|
-
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
|
|
879
|
+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d,c=%d:", i, ggml_op_name(node->op), node->name,
|
|
878
880
|
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
|
879
|
-
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
|
|
881
|
+
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)], node->flags & GGML_TENSOR_FLAG_COMPUTE ? 1 : 0);
|
|
880
882
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
|
881
883
|
struct ggml_tensor * src = node->src[j];
|
|
882
884
|
if (src == NULL) {
|
|
@@ -1922,6 +1924,7 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set,
|
|
|
1922
1924
|
dst->view_offs = src->view_offs;
|
|
1923
1925
|
}
|
|
1924
1926
|
dst->op = src->op;
|
|
1927
|
+
dst->flags = src->flags;
|
|
1925
1928
|
memcpy(dst->op_params, src->op_params, sizeof(dst->op_params));
|
|
1926
1929
|
ggml_set_name(dst, src->name);
|
|
1927
1930
|
|
|
@@ -93,7 +93,7 @@ if (BLAS_FOUND)
|
|
|
93
93
|
endif()
|
|
94
94
|
|
|
95
95
|
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
|
|
96
|
-
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
|
|
96
|
+
target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS})
|
|
97
97
|
else()
|
|
98
98
|
message(FATAL_ERROR "BLAS not found, please refer to "
|
|
99
99
|
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
|
@@ -226,6 +226,10 @@ static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend,
|
|
|
226
226
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
227
227
|
struct ggml_tensor * node = cgraph->nodes[i];
|
|
228
228
|
|
|
229
|
+
if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
|
|
229
233
|
switch (node->op) {
|
|
230
234
|
case GGML_OP_MUL_MAT:
|
|
231
235
|
ggml_backend_blas_mul_mat(ctx, node);
|
|
@@ -335,8 +339,8 @@ static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t
|
|
|
335
339
|
}
|
|
336
340
|
|
|
337
341
|
static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
338
|
-
//
|
|
339
|
-
*free
|
|
342
|
+
// no memory to report
|
|
343
|
+
*free = 0;
|
|
340
344
|
*total = 0;
|
|
341
345
|
|
|
342
346
|
GGML_UNUSED(dev);
|