whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
#endif
|
|
36
36
|
#include <sycl/half_type.hpp>
|
|
37
37
|
|
|
38
|
+
#include "ggml.h"
|
|
38
39
|
#include "ggml-sycl.h"
|
|
39
40
|
#include "ggml-impl.h"
|
|
40
41
|
#include "ggml-backend-impl.h"
|
|
@@ -43,17 +44,18 @@
|
|
|
43
44
|
#include "ggml-sycl/backend.hpp"
|
|
44
45
|
#include "ggml-sycl/common.hpp"
|
|
45
46
|
#include "ggml-sycl/element_wise.hpp"
|
|
47
|
+
#include "ggml-sycl/gated_delta_net.hpp"
|
|
48
|
+
#include "ggml-sycl/gemm.hpp"
|
|
49
|
+
#include "ggml-sycl/getrows.hpp"
|
|
46
50
|
#include "ggml-sycl/norm.hpp"
|
|
47
51
|
#include "ggml-sycl/presets.hpp"
|
|
48
|
-
#include "ggml-sycl/
|
|
52
|
+
#include "ggml-sycl/quantize.hpp"
|
|
53
|
+
#include "ggml-sycl/repeat_back.hpp"
|
|
49
54
|
#include "ggml-sycl/set_rows.hpp"
|
|
50
55
|
#include "ggml-sycl/set.hpp"
|
|
51
|
-
#include "ggml-sycl/sycl_hw.hpp"
|
|
52
|
-
#include "ggml-sycl/getrows.hpp"
|
|
53
|
-
#include "ggml-sycl/repeat_back.hpp"
|
|
54
|
-
#include "ggml-sycl/quantize.hpp"
|
|
55
56
|
#include "ggml-sycl/ssm_conv.hpp"
|
|
56
|
-
#include "ggml.
|
|
57
|
+
#include "ggml-sycl/sycl_hw.hpp"
|
|
58
|
+
|
|
57
59
|
|
|
58
60
|
static bool g_sycl_loaded = false;
|
|
59
61
|
int g_ggml_sycl_debug = 0;
|
|
@@ -62,6 +64,8 @@ int g_ggml_sycl_disable_graph = 0;
|
|
|
62
64
|
int g_ggml_sycl_disable_dnn = 0;
|
|
63
65
|
int g_ggml_sycl_prioritize_dmmv = 0;
|
|
64
66
|
int g_ggml_sycl_use_async_mem_op = 0;
|
|
67
|
+
int g_ggml_sycl_enable_flash_attention = 1;
|
|
68
|
+
|
|
65
69
|
|
|
66
70
|
static ggml_sycl_device_info ggml_sycl_init() {
|
|
67
71
|
ggml_sycl_device_info info = {};
|
|
@@ -94,11 +98,14 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|
|
94
98
|
|
|
95
99
|
info.devices[i].cc =
|
|
96
100
|
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
|
97
|
-
info.devices[i].nsm = prop.get_max_compute_units();
|
|
101
|
+
info.devices[i].nsm = prop.get_max_compute_units() / 16; //16: Number of Xe Cores
|
|
98
102
|
info.devices[i].opt_feature.reorder = device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu);
|
|
99
103
|
info.devices[i].smpbo = prop.get_local_mem_size();
|
|
104
|
+
info.devices[i].warp_size = WARP_SIZE;
|
|
100
105
|
|
|
101
106
|
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
|
|
107
|
+
info.devices[i].max_wg_per_cu = info.max_work_group_sizes[i] / prop.get_max_compute_units();
|
|
108
|
+
|
|
102
109
|
}
|
|
103
110
|
|
|
104
111
|
for (int id = 0; id < info.device_count; ++id) {
|
|
@@ -211,7 +218,37 @@ static void ggml_check_sycl() try {
|
|
|
211
218
|
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
|
212
219
|
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
|
213
220
|
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
|
221
|
+
|
|
222
|
+
#ifdef SYCL_FLASH_ATTN
|
|
223
|
+
g_ggml_sycl_enable_flash_attention = get_sycl_env("GGML_SYCL_ENABLE_FLASH_ATTN", 1);
|
|
224
|
+
#else
|
|
225
|
+
g_ggml_sycl_enable_flash_attention = 0;
|
|
226
|
+
#endif
|
|
227
|
+
|
|
214
228
|
GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
|
|
229
|
+
|
|
230
|
+
GGML_LOG_INFO("Build with Macros:\n");
|
|
231
|
+
#if defined(GGML_SYCL_FORCE_MMQ)
|
|
232
|
+
GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: yes\n");
|
|
233
|
+
#else
|
|
234
|
+
GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: no\n");
|
|
235
|
+
#endif
|
|
236
|
+
#if defined(GGML_SYCL_F16)
|
|
237
|
+
GGML_LOG_INFO(" GGML_SYCL_F16: yes\n");
|
|
238
|
+
#else
|
|
239
|
+
GGML_LOG_INFO(" GGML_SYCL_F16: no\n");
|
|
240
|
+
#endif
|
|
241
|
+
#if defined(GGML_SYCL_GRAPH)
|
|
242
|
+
GGML_LOG_INFO(" GGML_SYCL_GRAPH: yes\n");
|
|
243
|
+
#else
|
|
244
|
+
GGML_LOG_INFO(" GGML_SYCL_GRAPH: no\n");
|
|
245
|
+
#endif
|
|
246
|
+
#if defined(GGML_SYCL_DNNL)
|
|
247
|
+
GGML_LOG_INFO(" GGML_SYCL_DNNL: yes\n");
|
|
248
|
+
#else
|
|
249
|
+
GGML_LOG_INFO(" GGML_SYCL_DNNL: no\n");
|
|
250
|
+
#endif
|
|
251
|
+
|
|
215
252
|
GGML_LOG_INFO("Running with Environment Variables:\n");
|
|
216
253
|
GGML_LOG_INFO(" GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
|
|
217
254
|
GGML_LOG_INFO(" GGML_SYCL_DISABLE_OPT: %d\n", g_ggml_sycl_disable_optimize);
|
|
@@ -226,16 +263,12 @@ static void ggml_check_sycl() try {
|
|
|
226
263
|
GGML_LOG_INFO(" GGML_SYCL_DISABLE_DNN: DNN disabled by compile flag\n");
|
|
227
264
|
#endif
|
|
228
265
|
GGML_LOG_INFO(" GGML_SYCL_PRIORITIZE_DMMV: %d\n", g_ggml_sycl_prioritize_dmmv);
|
|
229
|
-
|
|
230
|
-
#
|
|
231
|
-
GGML_LOG_INFO("
|
|
232
|
-
#else
|
|
233
|
-
GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: no\n");
|
|
234
|
-
#endif
|
|
235
|
-
#if defined(GGML_SYCL_F16)
|
|
236
|
-
GGML_LOG_INFO(" GGML_SYCL_F16: yes\n");
|
|
266
|
+
|
|
267
|
+
#ifdef SYCL_FLASH_ATTN
|
|
268
|
+
GGML_LOG_INFO(" GGML_SYCL_ENABLE_FLASH_ATTN: %d\n", g_ggml_sycl_enable_flash_attention);
|
|
237
269
|
#else
|
|
238
|
-
GGML_LOG_INFO("
|
|
270
|
+
GGML_LOG_INFO(" GGML_SYCL_ENABLE_FLASH_ATTN: %d disabled by compile flag\n",
|
|
271
|
+
g_ggml_sycl_enable_flash_attention);
|
|
239
272
|
#endif
|
|
240
273
|
|
|
241
274
|
/* NOT REMOVE, keep it for next optimize for XMX.
|
|
@@ -1157,13 +1190,28 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
|
|
|
1157
1190
|
GGML_UNUSED(buft);
|
|
1158
1191
|
}
|
|
1159
1192
|
|
|
1193
|
+
inline void * aligned_malloc_host(size_t alignment, size_t size) {
|
|
1194
|
+
#ifdef _WIN32
|
|
1195
|
+
return _aligned_malloc(size, alignment);
|
|
1196
|
+
#else
|
|
1197
|
+
return aligned_alloc(alignment, size);
|
|
1198
|
+
#endif
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
inline void free_aligned_mem_host(void * memblock) {
|
|
1202
|
+
#ifdef _WIN32
|
|
1203
|
+
_aligned_free(memblock);
|
|
1204
|
+
#else
|
|
1205
|
+
free(memblock);
|
|
1206
|
+
#endif
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1160
1209
|
static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
1161
|
-
|
|
1210
|
+
free_aligned_mem_host((void *)buffer->context);
|
|
1162
1211
|
}
|
|
1163
1212
|
|
|
1164
1213
|
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
1165
|
-
void * ptr =
|
|
1166
|
-
|
|
1214
|
+
void * ptr = aligned_malloc_host(TENSOR_ALIGNMENT, size);
|
|
1167
1215
|
if (ptr == nullptr) {
|
|
1168
1216
|
// fallback to cpu buffer
|
|
1169
1217
|
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
|
@@ -1825,6 +1873,110 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
|
1825
1873
|
}
|
|
1826
1874
|
}
|
|
1827
1875
|
|
|
1876
|
+
static void top_k_f32_sycl(
|
|
1877
|
+
const float * src,
|
|
1878
|
+
int32_t * dst_indices,
|
|
1879
|
+
const int64_t ncols,
|
|
1880
|
+
const int64_t nrows,
|
|
1881
|
+
const int k,
|
|
1882
|
+
dpct::queue_ptr main_stream
|
|
1883
|
+
) {
|
|
1884
|
+
const int block_size = 128;
|
|
1885
|
+
|
|
1886
|
+
const sycl::range<1> block_dims(block_size);
|
|
1887
|
+
const sycl::range<1> grid_dims(nrows);
|
|
1888
|
+
|
|
1889
|
+
main_stream->submit([&](sycl::handler &cgh) {
|
|
1890
|
+
sycl::local_accessor<float, 1> shared_vals(sycl::range<1>(block_size * k), cgh);
|
|
1891
|
+
sycl::local_accessor<int, 1> shared_idx(sycl::range<1>(block_size * k), cgh);
|
|
1892
|
+
|
|
1893
|
+
cgh.parallel_for(
|
|
1894
|
+
sycl::nd_range<1>(grid_dims * block_dims, block_dims),
|
|
1895
|
+
[=](sycl::nd_item<1> item_ct1) {
|
|
1896
|
+
const int row = item_ct1.get_group(0);
|
|
1897
|
+
const int tid = item_ct1.get_local_id(0);
|
|
1898
|
+
|
|
1899
|
+
if (row >= nrows) return;
|
|
1900
|
+
|
|
1901
|
+
const float * src_row = src + row * ncols;
|
|
1902
|
+
int32_t * dst_idx_row = dst_indices + row * k;
|
|
1903
|
+
|
|
1904
|
+
float local_vals[32];
|
|
1905
|
+
int local_idx[32];
|
|
1906
|
+
|
|
1907
|
+
for (int i = 0; i < k; i++) {
|
|
1908
|
+
local_vals[i] = -FLT_MAX;
|
|
1909
|
+
local_idx[i] = -1;
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
for (int col = tid; col < ncols; col += block_size) {
|
|
1913
|
+
float val = src_row[col];
|
|
1914
|
+
|
|
1915
|
+
if (val > local_vals[k-1]) {
|
|
1916
|
+
int pos = k - 1;
|
|
1917
|
+
while (pos > 0 && val > local_vals[pos - 1]) {
|
|
1918
|
+
pos--;
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
for (int i = k - 1; i > pos; i--) {
|
|
1922
|
+
local_vals[i] = local_vals[i - 1];
|
|
1923
|
+
local_idx[i] = local_idx[i - 1];
|
|
1924
|
+
}
|
|
1925
|
+
local_vals[pos] = val;
|
|
1926
|
+
local_idx[pos] = col;
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
for (int i = 0; i < k; i++) {
|
|
1931
|
+
shared_vals[tid * k + i] = local_vals[i];
|
|
1932
|
+
shared_idx[tid * k + i] = local_idx[i];
|
|
1933
|
+
}
|
|
1934
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
1935
|
+
|
|
1936
|
+
if (tid == 0) {
|
|
1937
|
+
float final_vals[32];
|
|
1938
|
+
int final_idx[32];
|
|
1939
|
+
|
|
1940
|
+
for (int i = 0; i < k; i++) {
|
|
1941
|
+
final_vals[i] = -FLT_MAX;
|
|
1942
|
+
final_idx[i] = -1;
|
|
1943
|
+
}
|
|
1944
|
+
|
|
1945
|
+
for (int t = 0; t < block_size; t++) {
|
|
1946
|
+
for (int i = 0; i < k; i++) {
|
|
1947
|
+
float val = shared_vals[t * k + i];
|
|
1948
|
+
int idx = shared_idx[t * k + i];
|
|
1949
|
+
|
|
1950
|
+
if (val > final_vals[k-1]) {
|
|
1951
|
+
int pos = k - 1;
|
|
1952
|
+
while (pos > 0 && val > final_vals[pos - 1]) {
|
|
1953
|
+
pos--;
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1956
|
+
for (int j = k - 1; j > pos; j--) {
|
|
1957
|
+
final_vals[j] = final_vals[j - 1];
|
|
1958
|
+
final_idx[j] = final_idx[j - 1];
|
|
1959
|
+
}
|
|
1960
|
+
final_vals[pos] = val;
|
|
1961
|
+
final_idx[pos] = idx;
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
for (int i = 0; i < k; i++) {
|
|
1967
|
+
dst_idx_row[i] = final_idx[i];
|
|
1968
|
+
}
|
|
1969
|
+
|
|
1970
|
+
if (k > 1) {
|
|
1971
|
+
int32_t temp = dst_idx_row[0];
|
|
1972
|
+
dst_idx_row[0] = dst_idx_row[1];
|
|
1973
|
+
dst_idx_row[1] = temp;
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
});
|
|
1977
|
+
});
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1828
1980
|
static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
1829
1981
|
const int nrows, queue_ptr stream) {
|
|
1830
1982
|
const sycl::range<3> block_dims(1, 1, SYCL_ARGMAX_BLOCK_SIZE);
|
|
@@ -2048,8 +2200,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
|
2048
2200
|
const sycl::half alpha_f16 = 1.0f;
|
|
2049
2201
|
const sycl::half beta_f16 = 0.0f;
|
|
2050
2202
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
|
|
2051
|
-
*stream, oneapi::
|
|
2052
|
-
oneapi::
|
|
2203
|
+
*stream, oneapi::mkl::transpose::trans,
|
|
2204
|
+
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
|
|
2053
2205
|
&alpha_f16, src0_ptr, dpct::library_data_t::real_half, ne00,
|
|
2054
2206
|
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
|
2055
2207
|
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
|
@@ -2092,8 +2244,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
|
2092
2244
|
{
|
|
2093
2245
|
const float alpha = 1.0f;
|
|
2094
2246
|
const float beta = 0.0f;
|
|
2095
|
-
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::
|
|
2096
|
-
|
|
2247
|
+
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
|
|
2248
|
+
*stream, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, row_diff,
|
|
2097
2249
|
src1_ncols, ne10, dpct::get_value(&alpha, *stream), src0_ddf_i, ne00, src1_ddf1_i, ne10,
|
|
2098
2250
|
dpct::get_value(&beta, *stream), dst_dd_i, ldc)));
|
|
2099
2251
|
}
|
|
@@ -2216,6 +2368,30 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor *
|
|
|
2216
2368
|
main_stream, ctx.device);
|
|
2217
2369
|
}
|
|
2218
2370
|
|
|
2371
|
+
static void ggml_sycl_op_top_k(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
2372
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
2373
|
+
|
|
2374
|
+
GGML_ASSERT(src0);
|
|
2375
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
2376
|
+
GGML_ASSERT(dst->type == GGML_TYPE_I32);
|
|
2377
|
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
2378
|
+
|
|
2379
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
2380
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
2381
|
+
|
|
2382
|
+
const float * src0_dd = static_cast<const float *>(src0->data);
|
|
2383
|
+
int32_t * dst_dd = static_cast<int32_t *>(dst->data);
|
|
2384
|
+
|
|
2385
|
+
const int k = dst->ne[0];
|
|
2386
|
+
const int64_t ncols = src0->ne[0];
|
|
2387
|
+
const int64_t nrows = ggml_nrows(src0);
|
|
2388
|
+
|
|
2389
|
+
GGML_ASSERT(k > 0 && k <= 32);
|
|
2390
|
+
GGML_ASSERT(k <= ncols);
|
|
2391
|
+
|
|
2392
|
+
top_k_f32_sycl(src0_dd, dst_dd, ncols, nrows, k, main_stream);
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2219
2395
|
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
2220
2396
|
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
2221
2397
|
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
|
@@ -2248,6 +2424,65 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_ten
|
|
|
2248
2424
|
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
|
|
2249
2425
|
}
|
|
2250
2426
|
|
|
2427
|
+
static void tri_f32_sycl(
|
|
2428
|
+
const float * src,
|
|
2429
|
+
float * dst,
|
|
2430
|
+
const int64_t ne0,
|
|
2431
|
+
const int64_t ne1,
|
|
2432
|
+
const int64_t ne2,
|
|
2433
|
+
const int64_t ne3,
|
|
2434
|
+
const ggml_tri_type ttype,
|
|
2435
|
+
dpct::queue_ptr main_stream
|
|
2436
|
+
) {
|
|
2437
|
+
const size_t total = (size_t) ne0 * (size_t) ne1 * (size_t) ne2 * (size_t) ne3;
|
|
2438
|
+
|
|
2439
|
+
main_stream->parallel_for(sycl::range<1>(total), [=](sycl::id<1> tid) {
|
|
2440
|
+
const int64_t idx = (int64_t) tid[0];
|
|
2441
|
+
|
|
2442
|
+
const int64_t i0 = idx % ne0;
|
|
2443
|
+
const int64_t t1 = idx / ne0;
|
|
2444
|
+
const int64_t i1 = t1 % ne1;
|
|
2445
|
+
|
|
2446
|
+
bool keep = false;
|
|
2447
|
+
switch (ttype) {
|
|
2448
|
+
case GGML_TRI_TYPE_LOWER: keep = (i0 < i1); break;
|
|
2449
|
+
case GGML_TRI_TYPE_LOWER_DIAG: keep = (i0 <= i1); break;
|
|
2450
|
+
case GGML_TRI_TYPE_UPPER: keep = (i0 > i1); break;
|
|
2451
|
+
case GGML_TRI_TYPE_UPPER_DIAG: keep = (i0 >= i1); break;
|
|
2452
|
+
default: keep = false; break;
|
|
2453
|
+
}
|
|
2454
|
+
|
|
2455
|
+
dst[idx] = keep ? src[idx] : 0.0f;
|
|
2456
|
+
});
|
|
2457
|
+
}
|
|
2458
|
+
|
|
2459
|
+
static void ggml_sycl_op_tri(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
2460
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
2461
|
+
GGML_ASSERT(src0);
|
|
2462
|
+
|
|
2463
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
2464
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
2465
|
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
2466
|
+
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
2467
|
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
2468
|
+
|
|
2469
|
+
dpct::queue_ptr main_stream = ctx.stream();
|
|
2470
|
+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
|
2471
|
+
|
|
2472
|
+
const float * src0_dd = static_cast<const float *>(src0->data);
|
|
2473
|
+
float * dst_dd = static_cast<float *>(dst->data);
|
|
2474
|
+
|
|
2475
|
+
const ggml_tri_type ttype = (ggml_tri_type) ggml_get_op_params_i32(dst, 0);
|
|
2476
|
+
|
|
2477
|
+
const int64_t ne0 = src0->ne[0];
|
|
2478
|
+
const int64_t ne1 = src0->ne[1];
|
|
2479
|
+
const int64_t ne2 = src0->ne[2];
|
|
2480
|
+
const int64_t ne3 = src0->ne[3];
|
|
2481
|
+
|
|
2482
|
+
tri_f32_sycl(src0_dd, dst_dd, ne0, ne1, ne2, ne3, ttype, main_stream);
|
|
2483
|
+
}
|
|
2484
|
+
|
|
2485
|
+
|
|
2251
2486
|
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
2252
2487
|
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
|
2253
2488
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
@@ -2810,7 +3045,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
|
2810
3045
|
|
|
2811
3046
|
}
|
|
2812
3047
|
#if GGML_SYCL_DNNL
|
|
2813
|
-
// oneDNN handles strided data and does not need overhead of
|
|
3048
|
+
// oneDNN handles strided data and does not need overhead of ggml_get_to_fp16_nc_sycl
|
|
2814
3049
|
const int64_t ne_src1 = src1->nb[last_str] * src1->ne[last_dim] / type_size_src1;
|
|
2815
3050
|
src1_f16_alloc.alloc(ne_src1);
|
|
2816
3051
|
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
|
@@ -2819,7 +3054,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
|
2819
3054
|
# else
|
|
2820
3055
|
const int64_t ne_src1 = ggml_nelements(src1);
|
|
2821
3056
|
src1_f16_alloc.alloc(ne_src1);
|
|
2822
|
-
const to_fp16_nc_sycl_t to_fp16_nc_sycl =
|
|
3057
|
+
const to_fp16_nc_sycl_t to_fp16_nc_sycl = ggml_get_to_fp16_nc_sycl(src1->type);
|
|
2823
3058
|
GGML_ASSERT(to_fp16_nc_sycl != nullptr);
|
|
2824
3059
|
to_fp16_nc_sycl(src1_f16, src1_f16_alloc.get(), ne10, ne11, ne12, ne13, s11, s12, s13, queue);
|
|
2825
3060
|
#endif
|
|
@@ -2963,8 +3198,8 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
|
2963
3198
|
const int64_t smb = ne12 == 1 ? s13 : s12;
|
|
2964
3199
|
|
|
2965
3200
|
// there is no broadcast and src0, src1 are contiguous across dims 2, 3
|
|
2966
|
-
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(*queue, oneapi::
|
|
2967
|
-
oneapi::
|
|
3201
|
+
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(*queue, oneapi::mkl::transpose::trans,
|
|
3202
|
+
oneapi::mkl::transpose::nontrans, ne01, ne11, ne10, alpha,
|
|
2968
3203
|
src0_f16, dpct::library_data_t::real_half, nb01 / nb00, sma,
|
|
2969
3204
|
src1_f16, dpct::library_data_t::real_half, s11, smb, beta, dst_ddf,
|
|
2970
3205
|
mkl_data_type, ne0, ne1 * ne0, ne12 * ne13, mkl_compute_type)));
|
|
@@ -2988,7 +3223,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
|
|
2988
3223
|
});
|
|
2989
3224
|
|
|
2990
3225
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
|
|
2991
|
-
*queue, oneapi::
|
|
3226
|
+
*queue, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, ne01, ne11, ne10, alpha,
|
|
2992
3227
|
(const void **) (ptrs_src.get() + 0 * ne23), dpct::library_data_t::real_half, nb01 / nb00,
|
|
2993
3228
|
(const void **) (ptrs_src.get() + 1 * ne23), dpct::library_data_t::real_half, s11, beta,
|
|
2994
3229
|
(void **) (ptrs_dst.get() + 0 * ne23), mkl_data_type, ne0, ne23, mkl_compute_type, matrix_info.get())));
|
|
@@ -3316,18 +3551,17 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
3316
3551
|
|
|
3317
3552
|
|
|
3318
3553
|
// mmvq and mmq need the __dp4a instruction which is available for gen12+
|
|
3319
|
-
// Workaround in https://github.com/
|
|
3554
|
+
// Workaround in https://github.com/ggml-org/llama.cpp/commit/95f84d5ce8b449a9b16009434aca800df504a02e
|
|
3320
3555
|
use_mul_mat_q = use_mul_mat_q && (src0->type != GGML_TYPE_IQ2_XXS);
|
|
3321
3556
|
#ifdef SYCL_USE_XMX
|
|
3322
3557
|
use_mul_mat_q = use_mul_mat_q && (src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
|
|
3323
3558
|
#endif // SYCL_USE_XMX
|
|
3324
3559
|
|
|
3325
|
-
//
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|| (should_reorder_tensor(ctx, dst) && ggml_sycl_supports_reorder_mmvq(src0->type)))) {
|
|
3560
|
+
// Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization
|
|
3561
|
+
// is enabled takes precedence over DMMV, the current if-else implementation
|
|
3562
|
+
// requires disabling DMMV if both conditions are met
|
|
3563
|
+
if (!g_ggml_sycl_prioritize_dmmv && ((should_reorder_tensor(ctx, dst) &&
|
|
3564
|
+
ggml_sycl_supports_reorder_mmvq(src0->type)))) {
|
|
3331
3565
|
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
|
|
3332
3566
|
}
|
|
3333
3567
|
|
|
@@ -3771,6 +4005,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3771
4005
|
case GGML_UNARY_OP_EXP:
|
|
3772
4006
|
ggml_sycl_exp(ctx, dst);
|
|
3773
4007
|
break;
|
|
4008
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
4009
|
+
ggml_sycl_softplus(ctx, dst);
|
|
4010
|
+
break;
|
|
3774
4011
|
case GGML_UNARY_OP_SGN:
|
|
3775
4012
|
ggml_sycl_sgn(ctx, dst);
|
|
3776
4013
|
break;
|
|
@@ -3897,6 +4134,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3897
4134
|
case GGML_OP_TRANSPOSE:
|
|
3898
4135
|
GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
|
|
3899
4136
|
break;
|
|
4137
|
+
case GGML_OP_TRI:
|
|
4138
|
+
ggml_sycl_op_tri(ctx, dst);
|
|
4139
|
+
break;
|
|
3900
4140
|
case GGML_OP_DIAG_MASK_INF:
|
|
3901
4141
|
ggml_sycl_diag_mask_inf(ctx, dst);
|
|
3902
4142
|
break;
|
|
@@ -3909,6 +4149,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3909
4149
|
case GGML_OP_ROPE:
|
|
3910
4150
|
ggml_sycl_rope(ctx, dst);
|
|
3911
4151
|
break;
|
|
4152
|
+
case GGML_OP_ROPE_BACK:
|
|
4153
|
+
ggml_sycl_rope_back(ctx, dst);
|
|
4154
|
+
break;
|
|
3912
4155
|
case GGML_OP_IM2COL:
|
|
3913
4156
|
ggml_sycl_im2col(ctx, dst);
|
|
3914
4157
|
break;
|
|
@@ -3927,6 +4170,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3927
4170
|
case GGML_OP_ARGSORT:
|
|
3928
4171
|
ggml_sycl_argsort(ctx, dst);
|
|
3929
4172
|
break;
|
|
4173
|
+
case GGML_OP_TOP_K:
|
|
4174
|
+
ggml_sycl_op_top_k(ctx, dst);
|
|
4175
|
+
break;
|
|
3930
4176
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
3931
4177
|
ggml_sycl_op_timestep_embedding(ctx, dst);
|
|
3932
4178
|
break;
|
|
@@ -3939,6 +4185,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3939
4185
|
case GGML_OP_GATED_LINEAR_ATTN:
|
|
3940
4186
|
ggml_sycl_op_gated_linear_attn(ctx, dst);
|
|
3941
4187
|
break;
|
|
4188
|
+
case GGML_OP_GATED_DELTA_NET:
|
|
4189
|
+
ggml_sycl_gated_delta_net(ctx, dst);
|
|
4190
|
+
break;
|
|
3942
4191
|
case GGML_OP_SSM_CONV:
|
|
3943
4192
|
ggml_sycl_ssm_conv(ctx, dst);
|
|
3944
4193
|
break;
|
|
@@ -3948,6 +4197,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|
|
3948
4197
|
case GGML_OP_ARANGE:
|
|
3949
4198
|
ggml_sycl_arange(ctx, dst);
|
|
3950
4199
|
break;
|
|
4200
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
|
4201
|
+
ggml_sycl_flash_attn_ext(ctx, dst);
|
|
4202
|
+
break;
|
|
3951
4203
|
default:
|
|
3952
4204
|
return false;
|
|
3953
4205
|
}
|
|
@@ -3978,16 +4230,6 @@ void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
|
|
3978
4230
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
|
|
3979
4231
|
ggml_sycl_set_device(device);
|
|
3980
4232
|
|
|
3981
|
-
/*
|
|
3982
|
-
DPCT1009:218: SYCL uses exceptions to report errors and does not use the
|
|
3983
|
-
error codes. The original code was commented out and a warning string was
|
|
3984
|
-
inserted. You need to rewrite this code.
|
|
3985
|
-
*/
|
|
3986
|
-
/*
|
|
3987
|
-
DPCT1106:217: 'cudaMemGetInfo' was migrated with the Intel extensions for
|
|
3988
|
-
device information which may not be supported by all compilers or runtimes.
|
|
3989
|
-
You may need to adjust the code.
|
|
3990
|
-
*/
|
|
3991
4233
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
|
3992
4234
|
dpct::dev_mgr::instance().get_device(device).get_memory_info(*free, *total)));
|
|
3993
4235
|
}
|
|
@@ -4109,6 +4351,9 @@ static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * syc
|
|
|
4109
4351
|
if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
|
|
4110
4352
|
continue;
|
|
4111
4353
|
}
|
|
4354
|
+
if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
|
|
4355
|
+
continue;
|
|
4356
|
+
}
|
|
4112
4357
|
#ifndef NDEBUG
|
|
4113
4358
|
assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
|
|
4114
4359
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
|
@@ -4386,10 +4631,11 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4386
4631
|
case GGML_UNARY_OP_GELU_QUICK:
|
|
4387
4632
|
case GGML_UNARY_OP_GELU_ERF:
|
|
4388
4633
|
case GGML_UNARY_OP_EXP:
|
|
4634
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
4389
4635
|
case GGML_UNARY_OP_ELU:
|
|
4636
|
+
case GGML_UNARY_OP_CEIL:
|
|
4390
4637
|
return true;
|
|
4391
4638
|
case GGML_UNARY_OP_FLOOR:
|
|
4392
|
-
case GGML_UNARY_OP_CEIL:
|
|
4393
4639
|
case GGML_UNARY_OP_ROUND:
|
|
4394
4640
|
case GGML_UNARY_OP_TRUNC:
|
|
4395
4641
|
#if defined (GGML_SYCL_F16)
|
|
@@ -4588,18 +4834,23 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4588
4834
|
return (op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32) && (op->type == op->src[0]->type);
|
|
4589
4835
|
#endif
|
|
4590
4836
|
case GGML_OP_NORM:
|
|
4591
|
-
return true;
|
|
4592
4837
|
case GGML_OP_L2_NORM:
|
|
4593
4838
|
case GGML_OP_GROUP_NORM:
|
|
4594
|
-
return ggml_is_contiguous(op->src[0]);
|
|
4595
4839
|
case GGML_OP_RMS_NORM:
|
|
4596
|
-
return
|
|
4840
|
+
return true;
|
|
4597
4841
|
case GGML_OP_RMS_NORM_BACK:
|
|
4598
|
-
return (
|
|
4842
|
+
return ggml_is_contiguous(op->src[0]);
|
|
4599
4843
|
case GGML_OP_SCALE:
|
|
4600
4844
|
return true;
|
|
4601
4845
|
case GGML_OP_CONT:
|
|
4602
4846
|
return op->src[0]->type != GGML_TYPE_BF16;
|
|
4847
|
+
case GGML_OP_TRI:
|
|
4848
|
+
{
|
|
4849
|
+
const ggml_tensor * src0 = op->src[0];
|
|
4850
|
+
return src0 &&
|
|
4851
|
+
op->type == GGML_TYPE_F32 &&
|
|
4852
|
+
ggml_is_contiguous(src0);
|
|
4853
|
+
}
|
|
4603
4854
|
case GGML_OP_DIAG_MASK_INF:
|
|
4604
4855
|
return true;
|
|
4605
4856
|
case GGML_OP_SOFT_MAX:
|
|
@@ -4610,6 +4861,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4610
4861
|
return max_bias == 0.0f;
|
|
4611
4862
|
}
|
|
4612
4863
|
case GGML_OP_ROPE:
|
|
4864
|
+
case GGML_OP_ROPE_BACK:
|
|
4613
4865
|
case GGML_OP_IM2COL:
|
|
4614
4866
|
return true;
|
|
4615
4867
|
case GGML_OP_UPSCALE:
|
|
@@ -4621,9 +4873,19 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4621
4873
|
case GGML_OP_ARGSORT:
|
|
4622
4874
|
return op->src[0]->ne[0] * sizeof(int) <=
|
|
4623
4875
|
ggml_sycl_info().devices[device].smpbo;
|
|
4876
|
+
case GGML_OP_TOP_K: {
|
|
4877
|
+
const ggml_tensor * src0 = op->src[0];
|
|
4878
|
+
const int k = op->ne[0];
|
|
4879
|
+
return src0 &&
|
|
4880
|
+
op->type == GGML_TYPE_I32 &&
|
|
4881
|
+
src0->type == GGML_TYPE_F32 &&
|
|
4882
|
+
ggml_is_contiguous(src0) &&
|
|
4883
|
+
k > 0 && k <= 32;
|
|
4884
|
+
}
|
|
4624
4885
|
case GGML_OP_POOL_2D:
|
|
4625
|
-
case GGML_OP_ACC:
|
|
4626
4886
|
return true;
|
|
4887
|
+
case GGML_OP_ACC:
|
|
4888
|
+
return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
|
|
4627
4889
|
case GGML_OP_PAD:
|
|
4628
4890
|
// TODO: add circular padding support for syscl, see https://github.com/ggml-org/llama.cpp/pull/16985
|
|
4629
4891
|
if (ggml_get_op_params_i32(op, 8) != 0) {
|
|
@@ -4635,6 +4897,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4635
4897
|
case GGML_OP_RWKV_WKV6:
|
|
4636
4898
|
case GGML_OP_RWKV_WKV7:
|
|
4637
4899
|
case GGML_OP_GATED_LINEAR_ATTN:
|
|
4900
|
+
case GGML_OP_GATED_DELTA_NET:
|
|
4638
4901
|
return true;
|
|
4639
4902
|
case GGML_OP_SSM_CONV:
|
|
4640
4903
|
return op->type == GGML_TYPE_F32 &&
|
|
@@ -4644,6 +4907,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
|
4644
4907
|
return op->type == GGML_TYPE_F32;
|
|
4645
4908
|
case GGML_OP_ARANGE:
|
|
4646
4909
|
return op->type == GGML_TYPE_F32;
|
|
4910
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
|
4911
|
+
return ggml_sycl_flash_attn_ext_supported(device, op);
|
|
4647
4912
|
default:
|
|
4648
4913
|
return false;
|
|
4649
4914
|
}
|