whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#include "../../include/ggml-virtgpu.h"
|
|
2
|
+
#include "ggml-remoting.h"
|
|
3
|
+
|
|
4
|
+
static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) {
|
|
5
|
+
UNUSED(backend);
|
|
6
|
+
|
|
7
|
+
return "API Remoting backend";
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
static void ggml_backend_remoting_free(ggml_backend_t backend) {
|
|
11
|
+
delete backend;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
15
|
+
virtgpu * gpu = DEV_TO_GPU(backend->device);
|
|
16
|
+
|
|
17
|
+
return apir_backend_graph_compute(gpu, cgraph);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
21
|
+
virtgpu * gpu = DEV_TO_GPU(backend->device);
|
|
22
|
+
#if true
|
|
23
|
+
UNUSED(gpu);
|
|
24
|
+
UNUSED(cgraph);
|
|
25
|
+
#else
|
|
26
|
+
// not working yet
|
|
27
|
+
|
|
28
|
+
apir_backend_graph_optimize(gpu, cgraph);
|
|
29
|
+
#endif
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
static ggml_backend_i ggml_backend_remoting_interface = {
|
|
33
|
+
/* .get_name = */ ggml_backend_remoting_get_name,
|
|
34
|
+
/* .free = */ ggml_backend_remoting_free,
|
|
35
|
+
/* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async,
|
|
36
|
+
/* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async,
|
|
37
|
+
/* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async,
|
|
38
|
+
/* .synchronize = */ NULL, // ggml_backend_remoting_synchronize,
|
|
39
|
+
/* .graph_plan_create = */ NULL,
|
|
40
|
+
/* .graph_plan_free = */ NULL,
|
|
41
|
+
/* .graph_plan_update = */ NULL,
|
|
42
|
+
/* .graph_plan_compute = */ NULL,
|
|
43
|
+
/* .graph_compute = */ ggml_backend_remoting_graph_compute,
|
|
44
|
+
/* .event_record = */ NULL,
|
|
45
|
+
/* .event_wait = */ NULL,
|
|
46
|
+
/* .graph_optimize = */ ggml_backend_remoting_graph_optimize,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
static ggml_guid_t ggml_backend_remoting_guid() {
|
|
50
|
+
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02,
|
|
51
|
+
0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
|
52
|
+
|
|
53
|
+
return &guid;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params) {
|
|
57
|
+
UNUSED(params);
|
|
58
|
+
|
|
59
|
+
ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *) dev->context;
|
|
60
|
+
|
|
61
|
+
ggml_backend_t remoting_backend = new ggml_backend{
|
|
62
|
+
/* .guid = */ ggml_backend_remoting_guid(),
|
|
63
|
+
/* .interface = */ ggml_backend_remoting_interface,
|
|
64
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_virtgpu_reg(), ctx->device),
|
|
65
|
+
/* .context = */ ctx,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
return remoting_backend;
|
|
69
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml-backend-impl.h"
|
|
4
|
+
#include "ggml-backend.h"
|
|
5
|
+
#include "ggml-impl.h"
|
|
6
|
+
#include "virtgpu.h"
|
|
7
|
+
|
|
8
|
+
#include <memory>
|
|
9
|
+
#include <string>
|
|
10
|
+
|
|
11
|
+
#define GGML_VIRTGPU_NAME "ggml-virtgpu"
|
|
12
|
+
#define GGML_VIRTGPU "ggml-virtgpu: "
|
|
13
|
+
|
|
14
|
+
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
|
|
15
|
+
|
|
16
|
+
#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
|
|
17
|
+
#define USE_METAL_GUEST_SUPPORTS_OP 0
|
|
18
|
+
|
|
19
|
+
#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu
|
|
20
|
+
|
|
21
|
+
#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context)
|
|
22
|
+
|
|
23
|
+
#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context
|
|
24
|
+
|
|
25
|
+
#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle
|
|
26
|
+
|
|
27
|
+
#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context
|
|
28
|
+
|
|
29
|
+
#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu
|
|
30
|
+
|
|
31
|
+
struct ggml_backend_remoting_device_context {
|
|
32
|
+
size_t device;
|
|
33
|
+
std::string name;
|
|
34
|
+
std::string description;
|
|
35
|
+
|
|
36
|
+
std::vector<std::tuple<void *, size_t, virtgpu_shmem *>> shared_memory;
|
|
37
|
+
|
|
38
|
+
virtgpu * gpu;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
struct ggml_backend_remoting_buffer_context {
|
|
42
|
+
apir_buffer_context_t apir_context;
|
|
43
|
+
|
|
44
|
+
virtgpu * gpu;
|
|
45
|
+
|
|
46
|
+
void * base;
|
|
47
|
+
|
|
48
|
+
bool is_from_ptr;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface;
|
|
52
|
+
extern const ggml_backend_device_i ggml_backend_remoting_device_interface;
|
|
53
|
+
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface;
|
|
54
|
+
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface;
|
|
55
|
+
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface;
|
|
56
|
+
|
|
57
|
+
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device);
|
|
58
|
+
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
|
|
59
|
+
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
|
|
60
|
+
|
|
61
|
+
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
|
|
62
|
+
// in the backend, the buffer handle is the buffer pointer
|
|
63
|
+
return (apir_buffer_type_host_handle_t) buft->context;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
|
|
67
|
+
if (!buffer->context) {
|
|
68
|
+
GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__);
|
|
69
|
+
}
|
|
70
|
+
return BUFFER_TO_HOST_HANDLE(buffer);
|
|
71
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# YAML schema for GGML remoting API functions
|
|
2
|
+
# This defines the structure for generating the remoting layer code
|
|
3
|
+
|
|
4
|
+
# Configuration for the generated files
|
|
5
|
+
config:
|
|
6
|
+
# Base path for the generated files
|
|
7
|
+
base_path: "ggml/src"
|
|
8
|
+
|
|
9
|
+
# Header files to update
|
|
10
|
+
files:
|
|
11
|
+
apir_backend_header: "ggml-virtgpu-apir/backend/shared/apir_backend.gen.h"
|
|
12
|
+
backend_dispatched_header: "ggml-virtgpu-apir/backend/backend-dispatched.gen.h"
|
|
13
|
+
virtgpu_forward_header: "ggml-virtgpu-apir/virtgpu-forward.gen.h"
|
|
14
|
+
|
|
15
|
+
# Simplified function definitions with grouping and metadata combined
|
|
16
|
+
functions:
|
|
17
|
+
device:
|
|
18
|
+
group_description: "device"
|
|
19
|
+
functions:
|
|
20
|
+
get_device_count:
|
|
21
|
+
# No specific metadata - uses default void return and base params
|
|
22
|
+
|
|
23
|
+
get_count:
|
|
24
|
+
frontend_return: "int"
|
|
25
|
+
|
|
26
|
+
get_name:
|
|
27
|
+
frontend_return: "char *"
|
|
28
|
+
|
|
29
|
+
get_description:
|
|
30
|
+
frontend_return: "char *"
|
|
31
|
+
|
|
32
|
+
get_type:
|
|
33
|
+
frontend_return: "uint32_t"
|
|
34
|
+
|
|
35
|
+
get_memory:
|
|
36
|
+
frontend_return: "void"
|
|
37
|
+
frontend_extra_params:
|
|
38
|
+
- "size_t *free"
|
|
39
|
+
- "size_t *total"
|
|
40
|
+
|
|
41
|
+
supports_op:
|
|
42
|
+
frontend_return: "bool"
|
|
43
|
+
frontend_extra_params:
|
|
44
|
+
- "const ggml_tensor *op"
|
|
45
|
+
|
|
46
|
+
get_buffer_type:
|
|
47
|
+
frontend_return: "apir_buffer_type_host_handle_t"
|
|
48
|
+
|
|
49
|
+
get_props:
|
|
50
|
+
frontend_return: "void"
|
|
51
|
+
frontend_extra_params:
|
|
52
|
+
- "bool *async"
|
|
53
|
+
- "bool *host_buffer"
|
|
54
|
+
- "bool *buffer_from_host_ptr"
|
|
55
|
+
- "bool *events"
|
|
56
|
+
|
|
57
|
+
buffer_from_ptr:
|
|
58
|
+
frontend_return: "apir_buffer_context_t"
|
|
59
|
+
frontend_extra_params:
|
|
60
|
+
- "size_t size"
|
|
61
|
+
- "size_t max_tensor_size"
|
|
62
|
+
|
|
63
|
+
buffer_type:
|
|
64
|
+
group_description: "buffer-type"
|
|
65
|
+
functions:
|
|
66
|
+
get_name:
|
|
67
|
+
frontend_return: "char *"
|
|
68
|
+
frontend_extra_params:
|
|
69
|
+
- "apir_buffer_type_host_handle_t host_handle"
|
|
70
|
+
|
|
71
|
+
get_alignment:
|
|
72
|
+
frontend_return: "size_t"
|
|
73
|
+
frontend_extra_params:
|
|
74
|
+
- "apir_buffer_type_host_handle_t host_handle"
|
|
75
|
+
|
|
76
|
+
get_max_size:
|
|
77
|
+
frontend_return: "size_t"
|
|
78
|
+
frontend_extra_params:
|
|
79
|
+
- "apir_buffer_type_host_handle_t host_handle"
|
|
80
|
+
|
|
81
|
+
is_host:
|
|
82
|
+
deprecated: true
|
|
83
|
+
|
|
84
|
+
alloc_buffer:
|
|
85
|
+
frontend_return: "apir_buffer_context_t"
|
|
86
|
+
frontend_extra_params:
|
|
87
|
+
- "apir_buffer_type_host_handle_t host_handle"
|
|
88
|
+
- "size_t size"
|
|
89
|
+
|
|
90
|
+
get_alloc_size:
|
|
91
|
+
frontend_return: "size_t"
|
|
92
|
+
frontend_extra_params:
|
|
93
|
+
- "apir_buffer_type_host_handle_t host_handle"
|
|
94
|
+
- "const ggml_tensor *op"
|
|
95
|
+
|
|
96
|
+
buffer:
|
|
97
|
+
group_description: "buffer"
|
|
98
|
+
functions:
|
|
99
|
+
get_base:
|
|
100
|
+
frontend_return: "void *"
|
|
101
|
+
frontend_extra_params:
|
|
102
|
+
- "apir_buffer_context_t *buffer_context"
|
|
103
|
+
|
|
104
|
+
set_tensor:
|
|
105
|
+
frontend_return: "void"
|
|
106
|
+
frontend_extra_params:
|
|
107
|
+
- "apir_buffer_context_t *buffer_context"
|
|
108
|
+
- "ggml_tensor *tensor"
|
|
109
|
+
- "const void *data"
|
|
110
|
+
- "size_t offset"
|
|
111
|
+
- "size_t size"
|
|
112
|
+
|
|
113
|
+
get_tensor:
|
|
114
|
+
frontend_return: "void"
|
|
115
|
+
frontend_extra_params:
|
|
116
|
+
- "apir_buffer_context_t *buffer_context"
|
|
117
|
+
- "const ggml_tensor *tensor"
|
|
118
|
+
- "void *data"
|
|
119
|
+
- "size_t offset"
|
|
120
|
+
- "size_t size"
|
|
121
|
+
|
|
122
|
+
cpy_tensor:
|
|
123
|
+
frontend_return: "bool"
|
|
124
|
+
frontend_extra_params:
|
|
125
|
+
- "apir_buffer_context_t *buffer_context"
|
|
126
|
+
- "const ggml_tensor *src"
|
|
127
|
+
- "const ggml_tensor *dst"
|
|
128
|
+
|
|
129
|
+
clear:
|
|
130
|
+
frontend_return: "void"
|
|
131
|
+
frontend_extra_params:
|
|
132
|
+
- "apir_buffer_context_t *buffer_context"
|
|
133
|
+
- "uint8_t value"
|
|
134
|
+
|
|
135
|
+
free_buffer:
|
|
136
|
+
frontend_return: "void"
|
|
137
|
+
frontend_extra_params:
|
|
138
|
+
- "apir_buffer_context_t *buffer_context"
|
|
139
|
+
|
|
140
|
+
backend:
|
|
141
|
+
group_description: "backend"
|
|
142
|
+
functions:
|
|
143
|
+
graph_compute:
|
|
144
|
+
frontend_return: "ggml_status"
|
|
145
|
+
frontend_extra_params:
|
|
146
|
+
- "ggml_cgraph *cgraph"
|
|
147
|
+
|
|
148
|
+
graph_optimize:
|
|
149
|
+
frontend_return: "ggml_cgraph *"
|
|
150
|
+
frontend_extra_params:
|
|
151
|
+
- "ggml_cgraph *cgraph"
|
|
152
|
+
enabled: false
|
|
153
|
+
|
|
154
|
+
# Naming patterns used for code generation
|
|
155
|
+
naming_patterns:
|
|
156
|
+
# How to generate enum names
|
|
157
|
+
enum_prefix: "APIR_COMMAND_TYPE_"
|
|
158
|
+
|
|
159
|
+
# How to generate backend function names
|
|
160
|
+
backend_function_prefix: "backend_"
|
|
161
|
+
|
|
162
|
+
# How to generate frontend function names
|
|
163
|
+
frontend_function_prefix: "apir_"
|
|
164
|
+
|
|
165
|
+
# Standard frontend first parameter
|
|
166
|
+
frontend_base_param: "struct virtgpu *gpu"
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
# Generated by Claude AI
|
|
4
|
+
|
|
5
|
+
Script to completely regenerate the GGML remoting codebase from YAML configuration.
|
|
6
|
+
|
|
7
|
+
This script reads api_functions.yaml and regenerates all the header files and
|
|
8
|
+
implementation templates for the GGML remoting layer.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python regenerate_remoting.py
|
|
12
|
+
|
|
13
|
+
The script will:
|
|
14
|
+
1. Read ggmlremoting_functions.yaml configuration
|
|
15
|
+
2. Generate updated header files
|
|
16
|
+
3. Generate implementation templates in dedicated files
|
|
17
|
+
4. Show a summary of what was generated
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
from typing import Dict, List, Any
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
import os
|
|
24
|
+
import subprocess
|
|
25
|
+
import shutil
|
|
26
|
+
import logging
|
|
27
|
+
|
|
28
|
+
NL = '\n' # can't have f"{'\n'}" in f-strings
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RemotingCodebaseGenerator:
|
|
32
|
+
def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"):
|
|
33
|
+
"""Initialize the generator with the YAML configuration."""
|
|
34
|
+
self.yaml_path = yaml_path
|
|
35
|
+
|
|
36
|
+
if not Path(yaml_path).exists():
|
|
37
|
+
raise FileNotFoundError(f"Configuration file {yaml_path} not found")
|
|
38
|
+
|
|
39
|
+
with open(yaml_path, 'r') as f:
|
|
40
|
+
self.config = yaml.safe_load(f)
|
|
41
|
+
|
|
42
|
+
self.functions = self.config['functions']
|
|
43
|
+
self.naming_patterns = self.config['naming_patterns']
|
|
44
|
+
self.config_data = self.config['config']
|
|
45
|
+
|
|
46
|
+
# Check if clang-format is available
|
|
47
|
+
self.clang_format_available = self._check_clang_format_available()
|
|
48
|
+
|
|
49
|
+
def _check_clang_format_available(self) -> bool:
|
|
50
|
+
"""Check if clang-format is available in the system PATH."""
|
|
51
|
+
return shutil.which("clang-format") is not None
|
|
52
|
+
|
|
53
|
+
def _format_file_with_clang_format(self, file_path: Path) -> bool:
|
|
54
|
+
"""Format a file with clang-format -i. Returns True if successful, False otherwise."""
|
|
55
|
+
if not self.clang_format_available:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
subprocess.run(
|
|
60
|
+
["clang-format", "-i", str(file_path)],
|
|
61
|
+
check=True,
|
|
62
|
+
capture_output=True,
|
|
63
|
+
text=True
|
|
64
|
+
)
|
|
65
|
+
return True
|
|
66
|
+
except subprocess.CalledProcessError:
|
|
67
|
+
logging.exception(f" ⚠️ clang-format failed for {file_path}")
|
|
68
|
+
return False
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logging.exception(f" ⚠️ Unexpected error formatting {file_path}: {e}")
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
def generate_enum_name(self, group_name: str, function_name: str) -> str:
|
|
74
|
+
"""Generate the APIR_COMMAND_TYPE enum name for a function."""
|
|
75
|
+
prefix = self.naming_patterns['enum_prefix']
|
|
76
|
+
return f"{prefix}{group_name.upper()}_{function_name.upper()}"
|
|
77
|
+
|
|
78
|
+
def generate_backend_function_name(self, group_name: str, function_name: str) -> str:
|
|
79
|
+
"""Generate the backend function name."""
|
|
80
|
+
function_key = f"{group_name}_{function_name}"
|
|
81
|
+
overrides = self.naming_patterns.get('backend_function_overrides', {})
|
|
82
|
+
|
|
83
|
+
if function_key in overrides:
|
|
84
|
+
return overrides[function_key]
|
|
85
|
+
|
|
86
|
+
prefix = self.naming_patterns['backend_function_prefix']
|
|
87
|
+
return f"{prefix}{group_name}_{function_name}"
|
|
88
|
+
|
|
89
|
+
def generate_frontend_function_name(self, group_name: str, function_name: str) -> str:
|
|
90
|
+
"""Generate the frontend function name."""
|
|
91
|
+
prefix = self.naming_patterns['frontend_function_prefix']
|
|
92
|
+
return f"{prefix}{group_name}_{function_name}"
|
|
93
|
+
|
|
94
|
+
def get_enabled_functions(self) -> List[Dict[str, Any]]:
|
|
95
|
+
"""Get all enabled functions with their metadata."""
|
|
96
|
+
functions = []
|
|
97
|
+
enum_value = 0
|
|
98
|
+
|
|
99
|
+
for group_name, group_data in self.functions.items():
|
|
100
|
+
group_description = group_data['group_description']
|
|
101
|
+
|
|
102
|
+
for function_name, func_metadata in group_data['functions'].items():
|
|
103
|
+
# Handle case where func_metadata is None or empty (functions with only comments)
|
|
104
|
+
if func_metadata is None:
|
|
105
|
+
func_metadata = {}
|
|
106
|
+
|
|
107
|
+
# Functions are enabled by default unless explicitly disabled
|
|
108
|
+
if func_metadata.get('enabled', True):
|
|
109
|
+
functions.append({
|
|
110
|
+
'group_name': group_name,
|
|
111
|
+
'function_name': function_name,
|
|
112
|
+
'enum_name': self.generate_enum_name(group_name, function_name),
|
|
113
|
+
'enum_value': enum_value,
|
|
114
|
+
'backend_function': self.generate_backend_function_name(group_name, function_name),
|
|
115
|
+
'frontend_function': self.generate_frontend_function_name(group_name, function_name),
|
|
116
|
+
'frontend_return': func_metadata.get('frontend_return', 'void'),
|
|
117
|
+
'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
|
|
118
|
+
'group_description': group_description,
|
|
119
|
+
'deprecated': func_metadata.get('deprecated', False),
|
|
120
|
+
})
|
|
121
|
+
enum_value += 1
|
|
122
|
+
|
|
123
|
+
return functions
|
|
124
|
+
|
|
125
|
+
def generate_apir_backend_header(self) -> str:
|
|
126
|
+
"""Generate the complete apir_backend.h file."""
|
|
127
|
+
functions = self.get_enabled_functions()
|
|
128
|
+
|
|
129
|
+
# Generate the enum section
|
|
130
|
+
enum_lines = ["typedef enum ApirBackendCommandType {"]
|
|
131
|
+
current_group = None
|
|
132
|
+
|
|
133
|
+
for func in functions:
|
|
134
|
+
# Add comment for new group
|
|
135
|
+
if func['group_name'] != current_group:
|
|
136
|
+
enum_lines.append("")
|
|
137
|
+
enum_lines.append(f" /* {func['group_description']} */")
|
|
138
|
+
current_group = func['group_name']
|
|
139
|
+
|
|
140
|
+
enum_lines.append(f" {func['enum_name']} = {func['enum_value']},")
|
|
141
|
+
|
|
142
|
+
# Add the count
|
|
143
|
+
total_count = len(functions)
|
|
144
|
+
enum_lines.append("\n // last command_type index + 1")
|
|
145
|
+
enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
|
|
146
|
+
enum_lines.append("} ApirBackendCommandType;")
|
|
147
|
+
|
|
148
|
+
# Generate function name mapping
|
|
149
|
+
func_lines = []
|
|
150
|
+
func_lines.append("static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {")
|
|
151
|
+
func_lines.append(" switch (type) {")
|
|
152
|
+
|
|
153
|
+
current_group = None
|
|
154
|
+
for func in functions:
|
|
155
|
+
# Add comment for new group
|
|
156
|
+
if func['group_name'] != current_group:
|
|
157
|
+
func_lines.append(f" /* {func['group_description']} */")
|
|
158
|
+
current_group = func['group_name']
|
|
159
|
+
|
|
160
|
+
# Generate clean function name without backend_ prefix
|
|
161
|
+
clean_name = f"{func['group_name']}_{func['function_name']}"
|
|
162
|
+
func_lines.append(f" case {func['enum_name']}:")
|
|
163
|
+
func_lines.append(f" return \"{clean_name}\";")
|
|
164
|
+
|
|
165
|
+
func_lines.append("")
|
|
166
|
+
func_lines.append(" default:")
|
|
167
|
+
func_lines.append(" return \"unknown\";")
|
|
168
|
+
func_lines.append(" }")
|
|
169
|
+
func_lines.append("}")
|
|
170
|
+
|
|
171
|
+
# Full header template
|
|
172
|
+
header_content = NL.join(enum_lines) + "\n\n" + NL.join(func_lines) + "\n"
|
|
173
|
+
|
|
174
|
+
return header_content
|
|
175
|
+
|
|
176
|
+
def generate_backend_dispatched_header(self) -> str:
|
|
177
|
+
"""Generate the complete backend-dispatched.h file."""
|
|
178
|
+
functions = self.get_enabled_functions()
|
|
179
|
+
|
|
180
|
+
# Function declarations
|
|
181
|
+
decl_lines = []
|
|
182
|
+
current_group = None
|
|
183
|
+
|
|
184
|
+
for func in functions:
|
|
185
|
+
if func['group_name'] != current_group:
|
|
186
|
+
decl_lines.append(f"\n/* {func['group_description']} */")
|
|
187
|
+
current_group = func['group_name']
|
|
188
|
+
|
|
189
|
+
signature = "uint32_t"
|
|
190
|
+
params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
|
|
191
|
+
if func['deprecated']:
|
|
192
|
+
decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */")
|
|
193
|
+
|
|
194
|
+
decl_lines.append(f"{signature} {func['backend_function']}({params});")
|
|
195
|
+
|
|
196
|
+
# Dispatch table
|
|
197
|
+
table_lines = []
|
|
198
|
+
current_group = None
|
|
199
|
+
|
|
200
|
+
for func in functions:
|
|
201
|
+
if func['group_name'] != current_group:
|
|
202
|
+
table_lines.append(f"\n /* {func['group_description']} */")
|
|
203
|
+
table_lines.append("")
|
|
204
|
+
current_group = func['group_name']
|
|
205
|
+
|
|
206
|
+
deprecated = " /* DEPRECATED */" if func['deprecated'] else ""
|
|
207
|
+
table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']}{deprecated},")
|
|
208
|
+
|
|
209
|
+
header_content = f'''\
|
|
210
|
+
#pragma once
|
|
211
|
+
|
|
212
|
+
{NL.join(decl_lines)}
|
|
213
|
+
|
|
214
|
+
extern "C" {{
|
|
215
|
+
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
|
|
216
|
+
{NL.join(table_lines)}
|
|
217
|
+
}};
|
|
218
|
+
}}
|
|
219
|
+
'''
|
|
220
|
+
return header_content
|
|
221
|
+
|
|
222
|
+
def generate_virtgpu_forward_header(self) -> str:
|
|
223
|
+
"""Generate the complete virtgpu-forward.gen.h file."""
|
|
224
|
+
functions = self.get_enabled_functions()
|
|
225
|
+
|
|
226
|
+
decl_lines = []
|
|
227
|
+
current_group = None
|
|
228
|
+
|
|
229
|
+
for func in functions:
|
|
230
|
+
if func['group_name'] != current_group:
|
|
231
|
+
decl_lines.append("")
|
|
232
|
+
decl_lines.append(f"/* {func['group_description']} */")
|
|
233
|
+
current_group = func['group_name']
|
|
234
|
+
|
|
235
|
+
if func['deprecated']:
|
|
236
|
+
decl_lines.append(f"/* {func['frontend_function']} is deprecated. */")
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# Build parameter list
|
|
240
|
+
params = [self.naming_patterns['frontend_base_param']]
|
|
241
|
+
params.extend(func['frontend_extra_params'])
|
|
242
|
+
param_str = ', '.join(params)
|
|
243
|
+
|
|
244
|
+
decl_lines.append(f"{func['frontend_return']} {func['frontend_function']}({param_str});")
|
|
245
|
+
|
|
246
|
+
header_content = f'''\
|
|
247
|
+
#pragma once
|
|
248
|
+
{NL.join(decl_lines)}
|
|
249
|
+
'''
|
|
250
|
+
return header_content
|
|
251
|
+
|
|
252
|
+
def regenerate_codebase(self) -> None:
|
|
253
|
+
"""Regenerate the entire remoting codebase."""
|
|
254
|
+
logging.info("🔄 Regenerating GGML Remoting Codebase...")
|
|
255
|
+
logging.info("=" * 50)
|
|
256
|
+
|
|
257
|
+
# Detect if we're running from frontend directory
|
|
258
|
+
current_dir = os.getcwd()
|
|
259
|
+
is_frontend_dir = current_dir.endswith('ggml-virtgpu')
|
|
260
|
+
|
|
261
|
+
if is_frontend_dir:
|
|
262
|
+
# Running from ggml/src/ggml-virtgpu-apir
|
|
263
|
+
logging.info("📍 Detected frontend directory execution")
|
|
264
|
+
frontend_base = Path(".")
|
|
265
|
+
else:
|
|
266
|
+
# Running from project root (fallback to original behavior)
|
|
267
|
+
logging.info("📍 Detected project root execution")
|
|
268
|
+
base_path = self.config_data.get('base_path', 'ggml/src')
|
|
269
|
+
frontend_base = Path(base_path) / "ggml-virtgpu"
|
|
270
|
+
|
|
271
|
+
# Compute final file paths
|
|
272
|
+
backend_base = frontend_base / "backend"
|
|
273
|
+
apir_backend_path = backend_base / "shared" / "apir_backend.gen.h"
|
|
274
|
+
backend_dispatched_path = backend_base / "backend-dispatched.gen.h"
|
|
275
|
+
virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h"
|
|
276
|
+
|
|
277
|
+
# Create output directories for each file
|
|
278
|
+
apir_backend_path.parent.mkdir(parents=True, exist_ok=True)
|
|
279
|
+
backend_dispatched_path.parent.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True)
|
|
281
|
+
|
|
282
|
+
# Generate header files
|
|
283
|
+
logging.info("📁 Generating header files...")
|
|
284
|
+
|
|
285
|
+
apir_backend_content = self.generate_apir_backend_header()
|
|
286
|
+
apir_backend_path.write_text(apir_backend_content)
|
|
287
|
+
logging.info(f" ✅ {apir_backend_path.resolve()}")
|
|
288
|
+
|
|
289
|
+
backend_dispatched_content = self.generate_backend_dispatched_header()
|
|
290
|
+
backend_dispatched_path.write_text(backend_dispatched_content)
|
|
291
|
+
logging.info(f" ✅ {backend_dispatched_path.resolve()}")
|
|
292
|
+
|
|
293
|
+
virtgpu_forward_content = self.generate_virtgpu_forward_header()
|
|
294
|
+
virtgpu_forward_path.write_text(virtgpu_forward_content)
|
|
295
|
+
logging.info(f" ✅ {virtgpu_forward_path.resolve()}")
|
|
296
|
+
|
|
297
|
+
# Format generated files with clang-format
|
|
298
|
+
generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
|
|
299
|
+
|
|
300
|
+
if not self.clang_format_available:
|
|
301
|
+
logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n"
|
|
302
|
+
" Install clang-format to enable automatic code formatting.")
|
|
303
|
+
else:
|
|
304
|
+
logging.info("\n🎨 Formatting files with clang-format...")
|
|
305
|
+
for file_path in generated_files:
|
|
306
|
+
if self._format_file_with_clang_format(file_path):
|
|
307
|
+
logging.info(f" ✅ Formatted {file_path.name}")
|
|
308
|
+
else:
|
|
309
|
+
logging.warning(f" ❌ Failed to format {file_path.name}")
|
|
310
|
+
|
|
311
|
+
# Generate summary
|
|
312
|
+
functions = self.get_enabled_functions()
|
|
313
|
+
total_functions = len(functions)
|
|
314
|
+
|
|
315
|
+
logging.info("\n📊 Generation Summary:")
|
|
316
|
+
logging.info("=" * 50)
|
|
317
|
+
logging.info(f" Total functions: {total_functions}")
|
|
318
|
+
logging.info(f" Function groups: {len(self.functions)}")
|
|
319
|
+
logging.info(" Header files: 3")
|
|
320
|
+
logging.info(f" Working directory: {current_dir}")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def main():
|
|
324
|
+
try:
|
|
325
|
+
generator = RemotingCodebaseGenerator()
|
|
326
|
+
generator.regenerate_codebase()
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logging.exception(f"❌ Error: {e}")
|
|
329
|
+
exit(1)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
if __name__ == "__main__":
|
|
333
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#include "backend/shared/apir_backend.h"
|
|
2
|
+
#include "ggml-alloc.h"
|
|
3
|
+
#include "ggml-impl.h"
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
#include "virtgpu-shm.h"
|
|
6
|
+
#include "virtgpu-utils.h"
|
|
7
|
+
|
|
8
|
+
struct apir_buffer_context_t {
|
|
9
|
+
apir_buffer_host_handle_t host_handle;
|
|
10
|
+
|
|
11
|
+
struct virtgpu_shmem shmem;
|
|
12
|
+
apir_buffer_type_host_handle_t buft_host_handle;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
#include "virtgpu-forward.gen.h"
|