whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -24,9 +24,6 @@
|
|
|
24
24
|
static const NSInteger MTLGPUFamilyMetal3_GGML = 5001;
|
|
25
25
|
static const NSInteger MTLGPUFamilyMetal4_GGML = 5002;
|
|
26
26
|
|
|
27
|
-
// virtual address for GPU memory allocations
|
|
28
|
-
static atomic_uintptr_t g_addr_device = 0x000000400ULL;
|
|
29
|
-
|
|
30
27
|
#if !GGML_METAL_EMBED_LIBRARY
|
|
31
28
|
// Here to assist with NSBundle Path Hack
|
|
32
29
|
@interface GGMLMetalClass : NSObject
|
|
@@ -349,10 +346,12 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline(ggml_meta
|
|
|
349
346
|
|
|
350
347
|
struct ggml_metal_pipeline_with_params res = {
|
|
351
348
|
/*.pipeline =*/ nil,
|
|
349
|
+
/*.nsg =*/ 0,
|
|
352
350
|
/*.nr0 =*/ 0,
|
|
353
351
|
/*.nr1 =*/ 0,
|
|
354
|
-
/*.nsg =*/ 0,
|
|
355
352
|
/*.smem =*/ 0,
|
|
353
|
+
/*.c4 =*/ false,
|
|
354
|
+
/*.cnt =*/ false,
|
|
356
355
|
};
|
|
357
356
|
|
|
358
357
|
res.pipeline = ggml_metal_pipelines_get(lib->pipelines, name);
|
|
@@ -365,10 +364,12 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline(ggml_meta
|
|
|
365
364
|
struct ggml_metal_pipeline_with_params ggml_metal_library_compile_pipeline(ggml_metal_library_t lib, const char * base, const char * name, ggml_metal_cv_t cv) {
|
|
366
365
|
struct ggml_metal_pipeline_with_params res = {
|
|
367
366
|
/*.pipeline =*/ nil,
|
|
367
|
+
/*.nsg =*/ 0,
|
|
368
368
|
/*.nr0 =*/ 0,
|
|
369
369
|
/*.nr1 =*/ 0,
|
|
370
|
-
/*.nsg =*/ 0,
|
|
371
370
|
/*.smem =*/ 0,
|
|
371
|
+
/*.c4 =*/ false,
|
|
372
|
+
/*.cnt =*/ false,
|
|
372
373
|
};
|
|
373
374
|
|
|
374
375
|
[lib->lock lock];
|
|
@@ -523,6 +524,9 @@ struct ggml_metal_device {
|
|
|
523
524
|
ggml_metal_library_t library;
|
|
524
525
|
|
|
525
526
|
struct ggml_metal_device_props props;
|
|
527
|
+
|
|
528
|
+
// virtual address for GPU memory allocations
|
|
529
|
+
atomic_uintptr_t addr_virt;
|
|
526
530
|
};
|
|
527
531
|
|
|
528
532
|
//
|
|
@@ -618,7 +622,7 @@ void ggml_metal_rsets_free(ggml_metal_rsets_t rsets) {
|
|
|
618
622
|
free(rsets);
|
|
619
623
|
}
|
|
620
624
|
|
|
621
|
-
ggml_metal_device_t ggml_metal_device_init(
|
|
625
|
+
ggml_metal_device_t ggml_metal_device_init(int device) {
|
|
622
626
|
ggml_metal_device_t dev = calloc(1, sizeof(struct ggml_metal_device));
|
|
623
627
|
|
|
624
628
|
assert(dev != NULL);
|
|
@@ -632,6 +636,9 @@ ggml_metal_device_t ggml_metal_device_init(void) {
|
|
|
632
636
|
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
|
|
633
637
|
}
|
|
634
638
|
|
|
639
|
+
dev->addr_virt = 0x000000400ULL;
|
|
640
|
+
|
|
641
|
+
dev->props.device = device;
|
|
635
642
|
dev->props.has_simdgroup_reduction = [dev->mtl_device supportsFamily:MTLGPUFamilyApple7];
|
|
636
643
|
dev->props.has_simdgroup_reduction |= [dev->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
|
|
637
644
|
|
|
@@ -785,10 +792,15 @@ ggml_metal_device_t ggml_metal_device_init(void) {
|
|
|
785
792
|
dev->props.op_offload_min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
|
|
786
793
|
|
|
787
794
|
dev->props.max_buffer_size = dev->mtl_device.maxBufferLength;
|
|
788
|
-
dev->props.max_working_set_size = dev->mtl_device.recommendedMaxWorkingSetSize;
|
|
789
795
|
dev->props.max_theadgroup_memory_size = dev->mtl_device.maxThreadgroupMemoryLength;
|
|
796
|
+
if (@available(macOS 10.12, iOS 16.0, *)) {
|
|
797
|
+
dev->props.max_working_set_size = dev->mtl_device.recommendedMaxWorkingSetSize;
|
|
798
|
+
} else {
|
|
799
|
+
dev->props.max_working_set_size = dev->mtl_device.maxBufferLength;
|
|
800
|
+
}
|
|
790
801
|
|
|
791
|
-
|
|
802
|
+
snprintf(dev->props.name, sizeof(dev->props.name), "%s%d", "MTL", device);
|
|
803
|
+
snprintf(dev->props.desc, sizeof(dev->props.desc), "%s", [[dev->mtl_device name] UTF8String]);
|
|
792
804
|
|
|
793
805
|
dev->library = ggml_metal_library_init(dev);
|
|
794
806
|
if (!dev->library) {
|
|
@@ -918,6 +930,59 @@ void ggml_metal_device_rsets_keep_alive(ggml_metal_device_t dev) {
|
|
|
918
930
|
atomic_store_explicit(&dev->rsets->d_loop, 2*dev->rsets->keep_alive_s, memory_order_relaxed);
|
|
919
931
|
}
|
|
920
932
|
|
|
933
|
+
struct ggml_metal_event {
|
|
934
|
+
void * obj; // id<MTLEvent>
|
|
935
|
+
|
|
936
|
+
atomic_int value;
|
|
937
|
+
};
|
|
938
|
+
|
|
939
|
+
void ggml_metal_event_encode_signal(ggml_metal_event_t ev, ggml_metal_cmd_buf_t cmd_buf_raw) {
|
|
940
|
+
id<MTLEvent> event = (id<MTLEvent>)ev->obj;
|
|
941
|
+
|
|
942
|
+
id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>) cmd_buf_raw;
|
|
943
|
+
|
|
944
|
+
[cmd_buf encodeSignalEvent:event value:atomic_fetch_add_explicit(&ev->value, 1, memory_order_relaxed) + 1];
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
void ggml_metal_event_encode_wait(ggml_metal_event_t ev, ggml_metal_cmd_buf_t cmd_buf_raw) {
|
|
948
|
+
id<MTLEvent> event = (id<MTLEvent>)ev->obj;
|
|
949
|
+
|
|
950
|
+
id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>) cmd_buf_raw;
|
|
951
|
+
|
|
952
|
+
[cmd_buf encodeWaitForEvent:event value:atomic_load_explicit(&ev->value, memory_order_relaxed)];
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
ggml_metal_event_t ggml_metal_device_event_init(ggml_metal_device_t dev) {
|
|
956
|
+
id<MTLEvent> event = [dev->mtl_device newEvent];
|
|
957
|
+
|
|
958
|
+
ggml_metal_event_t ev = calloc(1, sizeof(struct ggml_metal_event));
|
|
959
|
+
|
|
960
|
+
ev->obj = (__bridge void *)event;
|
|
961
|
+
ev->value = 0;
|
|
962
|
+
|
|
963
|
+
return ev;
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
void ggml_metal_device_event_free(ggml_metal_device_t dev, ggml_metal_event_t ev) {
|
|
967
|
+
id<MTLEvent> event = ev->obj;
|
|
968
|
+
[event release];
|
|
969
|
+
|
|
970
|
+
free(ev);
|
|
971
|
+
|
|
972
|
+
GGML_UNUSED(dev);
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
void ggml_metal_device_event_synchronize(ggml_metal_device_t dev, ggml_metal_event_t ev) {
|
|
976
|
+
@autoreleasepool {
|
|
977
|
+
id<MTLEvent> event = ev->obj;
|
|
978
|
+
|
|
979
|
+
id<MTLCommandBuffer> cmd_buf = [dev->mtl_queue commandBuffer];
|
|
980
|
+
[cmd_buf encodeWaitForEvent:event value:atomic_load_explicit(&ev->value, memory_order_relaxed)];
|
|
981
|
+
[cmd_buf commit];
|
|
982
|
+
[cmd_buf waitUntilCompleted];
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
|
|
921
986
|
void ggml_metal_device_get_memory(ggml_metal_device_t dev, size_t * free, size_t * total) {
|
|
922
987
|
if (@available(macOS 10.12, iOS 16.0, *)) {
|
|
923
988
|
*total = dev->mtl_device.recommendedMaxWorkingSetSize;
|
|
@@ -946,6 +1011,15 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
946
1011
|
}
|
|
947
1012
|
|
|
948
1013
|
switch (op->op) {
|
|
1014
|
+
case GGML_OP_SCALE:
|
|
1015
|
+
case GGML_OP_FILL:
|
|
1016
|
+
case GGML_OP_CLAMP:
|
|
1017
|
+
case GGML_OP_SQR:
|
|
1018
|
+
case GGML_OP_SQRT:
|
|
1019
|
+
case GGML_OP_SIN:
|
|
1020
|
+
case GGML_OP_COS:
|
|
1021
|
+
case GGML_OP_LOG:
|
|
1022
|
+
return ggml_is_contiguous_rows(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
|
|
949
1023
|
case GGML_OP_UNARY:
|
|
950
1024
|
switch (ggml_get_unary_op(op)) {
|
|
951
1025
|
case GGML_UNARY_OP_TANH:
|
|
@@ -965,7 +1039,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
965
1039
|
case GGML_UNARY_OP_EXP:
|
|
966
1040
|
case GGML_UNARY_OP_SOFTPLUS:
|
|
967
1041
|
case GGML_UNARY_OP_EXPM1:
|
|
968
|
-
return
|
|
1042
|
+
return ggml_is_contiguous_rows(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16);
|
|
969
1043
|
default:
|
|
970
1044
|
return false;
|
|
971
1045
|
}
|
|
@@ -993,11 +1067,9 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
993
1067
|
case GGML_OP_MUL:
|
|
994
1068
|
case GGML_OP_DIV:
|
|
995
1069
|
case GGML_OP_ADD_ID:
|
|
996
|
-
return op->src[0]->type == GGML_TYPE_F32;
|
|
997
1070
|
case GGML_OP_ACC:
|
|
1071
|
+
return ggml_is_contiguous_rows(op->src[0]) && ggml_is_contiguous_rows(op->src[1]) && op->src[0]->type == GGML_TYPE_F32;
|
|
998
1072
|
case GGML_OP_REPEAT:
|
|
999
|
-
case GGML_OP_SCALE:
|
|
1000
|
-
case GGML_OP_FILL:
|
|
1001
1073
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
1002
1074
|
return true;
|
|
1003
1075
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
@@ -1005,14 +1077,6 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1005
1077
|
(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32) &&
|
|
1006
1078
|
op->src[1]->type == GGML_TYPE_F32 &&
|
|
1007
1079
|
op->type == GGML_TYPE_F32;
|
|
1008
|
-
case GGML_OP_CLAMP:
|
|
1009
|
-
return op->src[0]->type == GGML_TYPE_F32;
|
|
1010
|
-
case GGML_OP_SQR:
|
|
1011
|
-
case GGML_OP_SQRT:
|
|
1012
|
-
case GGML_OP_SIN:
|
|
1013
|
-
case GGML_OP_COS:
|
|
1014
|
-
case GGML_OP_LOG:
|
|
1015
|
-
return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32;
|
|
1016
1080
|
case GGML_OP_SUM:
|
|
1017
1081
|
return has_simdgroup_reduction && ggml_is_contiguous(op->src[0]);
|
|
1018
1082
|
case GGML_OP_TRI:
|
|
@@ -1022,9 +1086,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1022
1086
|
case GGML_OP_MEAN:
|
|
1023
1087
|
case GGML_OP_SOFT_MAX:
|
|
1024
1088
|
case GGML_OP_GROUP_NORM:
|
|
1025
|
-
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
|
1026
1089
|
case GGML_OP_L2_NORM:
|
|
1027
|
-
return has_simdgroup_reduction && (op->
|
|
1090
|
+
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
|
1028
1091
|
case GGML_OP_COUNT_EQUAL:
|
|
1029
1092
|
return has_simdgroup_reduction &&
|
|
1030
1093
|
op->src[0]->type == GGML_TYPE_I32 &&
|
|
@@ -1044,10 +1107,10 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1044
1107
|
op->src[1]->type == GGML_TYPE_F32 &&
|
|
1045
1108
|
op->type == GGML_TYPE_F32 &&
|
|
1046
1109
|
(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
|
|
1047
|
-
case GGML_OP_POOL_1D:
|
|
1048
|
-
return false;
|
|
1049
1110
|
case GGML_OP_UPSCALE:
|
|
1050
|
-
return op->src[0]->type == GGML_TYPE_F32
|
|
1111
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
1112
|
+
case GGML_OP_POOL_1D:
|
|
1113
|
+
return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32;
|
|
1051
1114
|
case GGML_OP_POOL_2D:
|
|
1052
1115
|
return op->src[0]->type == GGML_TYPE_F32;
|
|
1053
1116
|
case GGML_OP_PAD:
|
|
@@ -1078,12 +1141,9 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1078
1141
|
op->src[0]->ne[0] != 112 &&
|
|
1079
1142
|
op->src[0]->ne[0] != 128 &&
|
|
1080
1143
|
op->src[0]->ne[0] != 192 &&
|
|
1081
|
-
op->src[0]->ne[0] != 256
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
if (op->src[0]->ne[0] == 576) {
|
|
1085
|
-
// DeepSeek sizes
|
|
1086
|
-
// TODO: disabled for now, until optmized
|
|
1144
|
+
op->src[0]->ne[0] != 256 &&
|
|
1145
|
+
op->src[0]->ne[0] != 320 &&
|
|
1146
|
+
op->src[0]->ne[0] != 576) {
|
|
1087
1147
|
return false;
|
|
1088
1148
|
}
|
|
1089
1149
|
if (op->src[1]->type != op->src[2]->type) {
|
|
@@ -1096,9 +1156,13 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1096
1156
|
case GGML_OP_RWKV_WKV6:
|
|
1097
1157
|
case GGML_OP_RWKV_WKV7:
|
|
1098
1158
|
return true;
|
|
1159
|
+
case GGML_OP_GATED_DELTA_NET:
|
|
1160
|
+
return has_simdgroup_reduction && op->src[2]->ne[0] % 32 == 0;
|
|
1161
|
+
case GGML_OP_SOLVE_TRI:
|
|
1099
1162
|
case GGML_OP_MUL_MAT:
|
|
1100
1163
|
case GGML_OP_MUL_MAT_ID:
|
|
1101
|
-
return has_simdgroup_reduction;
|
|
1164
|
+
return has_simdgroup_reduction && op->src[0]->type != GGML_TYPE_NVFP4;
|
|
1165
|
+
case GGML_OP_SET:
|
|
1102
1166
|
case GGML_OP_CPY:
|
|
1103
1167
|
case GGML_OP_DUP:
|
|
1104
1168
|
case GGML_OP_CONT:
|
|
@@ -1155,7 +1219,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1155
1219
|
};
|
|
1156
1220
|
}
|
|
1157
1221
|
case GGML_OP_GET_ROWS:
|
|
1158
|
-
return
|
|
1222
|
+
return op->src[0]->type != GGML_TYPE_NVFP4;
|
|
1159
1223
|
case GGML_OP_SET_ROWS:
|
|
1160
1224
|
{
|
|
1161
1225
|
if (op->src[0]->type != GGML_TYPE_F32) {
|
|
@@ -1177,6 +1241,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|
|
1177
1241
|
return false;
|
|
1178
1242
|
};
|
|
1179
1243
|
}
|
|
1244
|
+
case GGML_OP_DIAG:
|
|
1245
|
+
return true;
|
|
1180
1246
|
case GGML_OP_OPT_STEP_ADAMW:
|
|
1181
1247
|
case GGML_OP_OPT_STEP_SGD:
|
|
1182
1248
|
return has_simdgroup_reduction;
|
|
@@ -1218,7 +1284,7 @@ struct ggml_metal_buffer {
|
|
|
1218
1284
|
bool use_residency_sets;
|
|
1219
1285
|
|
|
1220
1286
|
// optional MTLResidencySet
|
|
1221
|
-
// note: cannot use
|
|
1287
|
+
// note: cannot use explicitly "id<MTLResidencySet>" here because it is not available on certain OSes
|
|
1222
1288
|
id rset;
|
|
1223
1289
|
|
|
1224
1290
|
// pointers to global device
|
|
@@ -1344,8 +1410,8 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
|
|
|
1344
1410
|
res->all_data = ggml_metal_host_malloc(size_aligned);
|
|
1345
1411
|
res->is_shared = true;
|
|
1346
1412
|
} else {
|
|
1347
|
-
// use virtual address
|
|
1348
|
-
res->all_data = (void *) atomic_fetch_add_explicit(&
|
|
1413
|
+
// use virtual address
|
|
1414
|
+
res->all_data = (void *) atomic_fetch_add_explicit(&dev->addr_virt, size_aligned, memory_order_relaxed);
|
|
1349
1415
|
res->is_shared = false;
|
|
1350
1416
|
}
|
|
1351
1417
|
res->all_size = size_aligned;
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#define N_R0_Q4_K 2
|
|
36
36
|
#define N_SG_Q4_K 2
|
|
37
37
|
|
|
38
|
-
#define N_R0_Q5_K
|
|
38
|
+
#define N_R0_Q5_K 1
|
|
39
39
|
#define N_SG_Q5_K 2
|
|
40
40
|
|
|
41
41
|
#define N_R0_Q6_K 2
|
|
@@ -78,15 +78,52 @@
|
|
|
78
78
|
#define FC_MUL_MM 700
|
|
79
79
|
#define FC_ROPE 800
|
|
80
80
|
#define FC_SSM_CONV 900
|
|
81
|
-
#define
|
|
81
|
+
#define FC_SOLVE_TRI 1000
|
|
82
|
+
#define FC_COUNT_EQUAL 1100
|
|
83
|
+
#define FC_UNARY 1200
|
|
84
|
+
#define FC_BIN 1300
|
|
85
|
+
#define FC_SUM_ROWS 1400
|
|
86
|
+
#define FC_UPSCALE 1500
|
|
87
|
+
#define FC_GATED_DELTA_NET 1600
|
|
82
88
|
|
|
83
89
|
// op-specific constants
|
|
84
|
-
#define
|
|
90
|
+
#define OP_FLASH_ATTN_EXT_NQPSG 8
|
|
85
91
|
#define OP_FLASH_ATTN_EXT_NCPSG 64
|
|
86
92
|
|
|
87
|
-
#define
|
|
93
|
+
#define OP_FLASH_ATTN_EXT_VEC_NQPSG 1
|
|
88
94
|
#define OP_FLASH_ATTN_EXT_VEC_NCPSG 32
|
|
89
95
|
|
|
96
|
+
#define OP_UNARY_NUM_SCALE 10
|
|
97
|
+
#define OP_UNARY_NUM_FILL 11
|
|
98
|
+
#define OP_UNARY_NUM_CLAMP 12
|
|
99
|
+
#define OP_UNARY_NUM_SQR 13
|
|
100
|
+
#define OP_UNARY_NUM_SQRT 14
|
|
101
|
+
#define OP_UNARY_NUM_SIN 15
|
|
102
|
+
#define OP_UNARY_NUM_COS 16
|
|
103
|
+
#define OP_UNARY_NUM_LOG 17
|
|
104
|
+
#define OP_UNARY_NUM_LEAKY_RELU 18
|
|
105
|
+
|
|
106
|
+
#define OP_UNARY_NUM_TANH 100
|
|
107
|
+
#define OP_UNARY_NUM_RELU 101
|
|
108
|
+
#define OP_UNARY_NUM_SIGMOID 102
|
|
109
|
+
#define OP_UNARY_NUM_GELU 103
|
|
110
|
+
#define OP_UNARY_NUM_GELU_ERF 104
|
|
111
|
+
#define OP_UNARY_NUM_GELU_QUICK 105
|
|
112
|
+
#define OP_UNARY_NUM_SILU 106
|
|
113
|
+
#define OP_UNARY_NUM_ELU 107
|
|
114
|
+
#define OP_UNARY_NUM_NEG 108
|
|
115
|
+
#define OP_UNARY_NUM_ABS 109
|
|
116
|
+
#define OP_UNARY_NUM_SGN 110
|
|
117
|
+
#define OP_UNARY_NUM_STEP 111
|
|
118
|
+
#define OP_UNARY_NUM_HARDSWISH 112
|
|
119
|
+
#define OP_UNARY_NUM_HARDSIGMOID 113
|
|
120
|
+
#define OP_UNARY_NUM_EXP 114
|
|
121
|
+
#define OP_UNARY_NUM_SOFTPLUS 115
|
|
122
|
+
#define OP_UNARY_NUM_EXPM1 116
|
|
123
|
+
|
|
124
|
+
#define OP_SUM_ROWS_NUM_SUM_ROWS 10
|
|
125
|
+
#define OP_SUM_ROWS_NUM_MEAN 11
|
|
126
|
+
|
|
90
127
|
// kernel argument structs
|
|
91
128
|
//
|
|
92
129
|
// - element counters (e.g. ne00) typically use int32_t to reduce register usage
|
|
@@ -122,6 +159,31 @@ typedef struct {
|
|
|
122
159
|
int32_t dim;
|
|
123
160
|
} ggml_metal_kargs_concat;
|
|
124
161
|
|
|
162
|
+
typedef struct {
|
|
163
|
+
int32_t ne00;
|
|
164
|
+
int32_t ne01;
|
|
165
|
+
int32_t ne02;
|
|
166
|
+
int32_t ne03;
|
|
167
|
+
uint64_t nb00;
|
|
168
|
+
uint64_t nb01;
|
|
169
|
+
uint64_t nb02;
|
|
170
|
+
uint64_t nb03;
|
|
171
|
+
int32_t ne0;
|
|
172
|
+
int32_t ne1;
|
|
173
|
+
int32_t ne2;
|
|
174
|
+
int32_t ne3;
|
|
175
|
+
uint64_t nb0;
|
|
176
|
+
uint64_t nb1;
|
|
177
|
+
uint64_t nb2;
|
|
178
|
+
uint64_t nb3;
|
|
179
|
+
float slope;
|
|
180
|
+
float scale;
|
|
181
|
+
float bias;
|
|
182
|
+
float val;
|
|
183
|
+
float min;
|
|
184
|
+
float max;
|
|
185
|
+
} ggml_metal_kargs_unary;
|
|
186
|
+
|
|
125
187
|
typedef struct {
|
|
126
188
|
int32_t ne00;
|
|
127
189
|
int32_t ne01;
|
|
@@ -179,20 +241,6 @@ typedef struct {
|
|
|
179
241
|
uint64_t nb3;
|
|
180
242
|
} ggml_metal_kargs_repeat;
|
|
181
243
|
|
|
182
|
-
typedef struct {
|
|
183
|
-
float scale;
|
|
184
|
-
float bias;
|
|
185
|
-
} ggml_metal_kargs_scale;
|
|
186
|
-
|
|
187
|
-
typedef struct {
|
|
188
|
-
float val;
|
|
189
|
-
} ggml_metal_kargs_fill;
|
|
190
|
-
|
|
191
|
-
typedef struct {
|
|
192
|
-
float min;
|
|
193
|
-
float max;
|
|
194
|
-
} ggml_metal_kargs_clamp;
|
|
195
|
-
|
|
196
244
|
typedef struct {
|
|
197
245
|
int64_t nk0;
|
|
198
246
|
int64_t ne00;
|
|
@@ -496,8 +544,21 @@ typedef struct {
|
|
|
496
544
|
|
|
497
545
|
typedef struct {
|
|
498
546
|
int32_t ne00;
|
|
499
|
-
int32_t
|
|
547
|
+
int32_t ne01;
|
|
548
|
+
int32_t ne02;
|
|
549
|
+
int32_t ne03;
|
|
550
|
+
uint64_t nb00;
|
|
500
551
|
uint64_t nb01;
|
|
552
|
+
uint64_t nb02;
|
|
553
|
+
uint64_t nb03;
|
|
554
|
+
int32_t ne0;
|
|
555
|
+
int32_t ne1;
|
|
556
|
+
int32_t ne2;
|
|
557
|
+
int32_t ne3;
|
|
558
|
+
uint64_t nb0;
|
|
559
|
+
uint64_t nb1;
|
|
560
|
+
uint64_t nb2;
|
|
561
|
+
uint64_t nb3;
|
|
501
562
|
float eps;
|
|
502
563
|
} ggml_metal_kargs_l2_norm;
|
|
503
564
|
|
|
@@ -733,6 +794,71 @@ typedef struct {
|
|
|
733
794
|
uint64_t nb0;
|
|
734
795
|
} ggml_metal_kargs_ssm_scan;
|
|
735
796
|
|
|
797
|
+
typedef struct {
|
|
798
|
+
int32_t ne00;
|
|
799
|
+
int32_t ne01;
|
|
800
|
+
int32_t ne02;
|
|
801
|
+
int32_t ne03;
|
|
802
|
+
uint64_t nb00;
|
|
803
|
+
uint64_t nb01;
|
|
804
|
+
uint64_t nb02;
|
|
805
|
+
uint64_t nb03;
|
|
806
|
+
int32_t ne10;
|
|
807
|
+
int32_t ne11;
|
|
808
|
+
int32_t ne12;
|
|
809
|
+
int32_t ne13;
|
|
810
|
+
uint64_t nb10;
|
|
811
|
+
uint64_t nb11;
|
|
812
|
+
uint64_t nb12;
|
|
813
|
+
uint64_t nb13;
|
|
814
|
+
int32_t ne20;
|
|
815
|
+
int32_t ne21;
|
|
816
|
+
int32_t ne22;
|
|
817
|
+
int32_t ne23;
|
|
818
|
+
uint64_t nb20;
|
|
819
|
+
uint64_t nb21;
|
|
820
|
+
uint64_t nb22;
|
|
821
|
+
uint64_t nb23;
|
|
822
|
+
int32_t ns02;
|
|
823
|
+
int32_t ns12;
|
|
824
|
+
int32_t ns22;
|
|
825
|
+
int32_t ne0;
|
|
826
|
+
int32_t ne1;
|
|
827
|
+
int32_t ne2;
|
|
828
|
+
int32_t ne3;
|
|
829
|
+
uint64_t nb0;
|
|
830
|
+
uint64_t nb1;
|
|
831
|
+
uint64_t nb2;
|
|
832
|
+
uint64_t nb3;
|
|
833
|
+
} ggml_metal_kargs_gated_delta_net;
|
|
834
|
+
|
|
835
|
+
typedef struct {
|
|
836
|
+
int32_t ne00;
|
|
837
|
+
int32_t ne01;
|
|
838
|
+
int32_t ne02;
|
|
839
|
+
int32_t ne03;
|
|
840
|
+
uint64_t nb00;
|
|
841
|
+
uint64_t nb01;
|
|
842
|
+
uint64_t nb02;
|
|
843
|
+
uint64_t nb03;
|
|
844
|
+
int32_t ne10;
|
|
845
|
+
int32_t ne11;
|
|
846
|
+
int32_t ne12;
|
|
847
|
+
int32_t ne13;
|
|
848
|
+
uint64_t nb10;
|
|
849
|
+
uint64_t nb11;
|
|
850
|
+
uint64_t nb12;
|
|
851
|
+
uint64_t nb13;
|
|
852
|
+
int32_t ne0;
|
|
853
|
+
int32_t ne1;
|
|
854
|
+
int32_t ne2;
|
|
855
|
+
int32_t ne3;
|
|
856
|
+
uint64_t nb0;
|
|
857
|
+
uint64_t nb1;
|
|
858
|
+
uint64_t nb2;
|
|
859
|
+
uint64_t nb3;
|
|
860
|
+
} ggml_metal_kargs_solve_tri;
|
|
861
|
+
|
|
736
862
|
typedef struct {
|
|
737
863
|
int32_t ne00t;
|
|
738
864
|
int32_t ne00;
|
|
@@ -764,6 +890,25 @@ typedef struct {
|
|
|
764
890
|
uint64_t nb3;
|
|
765
891
|
} ggml_metal_kargs_set_rows;
|
|
766
892
|
|
|
893
|
+
typedef struct {
|
|
894
|
+
int32_t ne00;
|
|
895
|
+
int32_t ne01;
|
|
896
|
+
int32_t ne02;
|
|
897
|
+
int32_t ne03;
|
|
898
|
+
uint64_t nb00;
|
|
899
|
+
uint64_t nb01;
|
|
900
|
+
uint64_t nb02;
|
|
901
|
+
uint64_t nb03;
|
|
902
|
+
int32_t ne0;
|
|
903
|
+
int32_t ne1;
|
|
904
|
+
int32_t ne2;
|
|
905
|
+
int32_t ne3;
|
|
906
|
+
uint64_t nb0;
|
|
907
|
+
uint64_t nb1;
|
|
908
|
+
uint64_t nb2;
|
|
909
|
+
uint64_t nb3;
|
|
910
|
+
} ggml_metal_kargs_diag;
|
|
911
|
+
|
|
767
912
|
typedef struct {
|
|
768
913
|
int64_t ne00;
|
|
769
914
|
int64_t ne01;
|
|
@@ -785,6 +930,7 @@ typedef struct {
|
|
|
785
930
|
float sf1;
|
|
786
931
|
float sf2;
|
|
787
932
|
float sf3;
|
|
933
|
+
float poffs;
|
|
788
934
|
} ggml_metal_kargs_upscale;
|
|
789
935
|
|
|
790
936
|
typedef struct {
|
|
@@ -833,10 +979,6 @@ typedef struct {
|
|
|
833
979
|
int max_period;
|
|
834
980
|
} ggml_metal_kargs_timestep_embedding;
|
|
835
981
|
|
|
836
|
-
typedef struct {
|
|
837
|
-
float slope;
|
|
838
|
-
} ggml_metal_kargs_leaky_relu;
|
|
839
|
-
|
|
840
982
|
typedef struct {
|
|
841
983
|
int32_t ne00;
|
|
842
984
|
int32_t ne01;
|
|
@@ -928,6 +1070,15 @@ typedef struct {
|
|
|
928
1070
|
int64_t np;
|
|
929
1071
|
} ggml_metal_kargs_pool_2d;
|
|
930
1072
|
|
|
1073
|
+
typedef struct {
|
|
1074
|
+
int32_t k0;
|
|
1075
|
+
int32_t s0;
|
|
1076
|
+
int32_t p0;
|
|
1077
|
+
int64_t IW;
|
|
1078
|
+
int64_t OW;
|
|
1079
|
+
int64_t np;
|
|
1080
|
+
} ggml_metal_kargs_pool_1d;
|
|
1081
|
+
|
|
931
1082
|
typedef struct {
|
|
932
1083
|
int64_t ne00;
|
|
933
1084
|
uint64_t nb01;
|