whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -36,7 +36,47 @@ if (WIN32)
|
|
|
36
36
|
endif()
|
|
37
37
|
endif()
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
macro(detect_and_find_package package_name)
|
|
40
|
+
set(test_source "
|
|
41
|
+
cmake_minimum_required(VERSION ${CMAKE_VERSION})
|
|
42
|
+
project(check_package LANGUAGES CXX)
|
|
43
|
+
find_package(${package_name} QUIET)
|
|
44
|
+
")
|
|
45
|
+
|
|
46
|
+
set(test_dir "${CMAKE_CURRENT_BINARY_DIR}/check_package_${package_name}")
|
|
47
|
+
file(WRITE "${test_dir}/CMakeLists.txt" "${test_source}")
|
|
48
|
+
|
|
49
|
+
set(cmake_args "")
|
|
50
|
+
if(CMAKE_GENERATOR)
|
|
51
|
+
list(APPEND cmake_args "-G" "${CMAKE_GENERATOR}")
|
|
52
|
+
endif()
|
|
53
|
+
if(CMAKE_GENERATOR_PLATFORM)
|
|
54
|
+
list(APPEND cmake_args "-A" "${CMAKE_GENERATOR_PLATFORM}")
|
|
55
|
+
endif()
|
|
56
|
+
if(CMAKE_GENERATOR_TOOLSET)
|
|
57
|
+
list(APPEND cmake_args "-T" "${CMAKE_GENERATOR_TOOLSET}")
|
|
58
|
+
endif()
|
|
59
|
+
if(CMAKE_CXX_COMPILER)
|
|
60
|
+
list(APPEND cmake_args "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
|
|
61
|
+
endif()
|
|
62
|
+
|
|
63
|
+
execute_process(
|
|
64
|
+
COMMAND ${CMAKE_COMMAND} ${cmake_args} .
|
|
65
|
+
WORKING_DIRECTORY "${test_dir}"
|
|
66
|
+
RESULT_VARIABLE result
|
|
67
|
+
OUTPUT_QUIET
|
|
68
|
+
ERROR_QUIET
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if(result EQUAL 0)
|
|
72
|
+
find_package(${package_name} ${ARGN})
|
|
73
|
+
else()
|
|
74
|
+
message(WARNING "Detection of ${package_name} failed. The package might be broken or incompatible.")
|
|
75
|
+
set(${package_name}_FOUND FALSE)
|
|
76
|
+
endif()
|
|
77
|
+
endmacro()
|
|
78
|
+
|
|
79
|
+
detect_and_find_package(IntelSYCL)
|
|
40
80
|
if (IntelSYCL_FOUND)
|
|
41
81
|
# Use oneAPI CMake when possible
|
|
42
82
|
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
|
|
@@ -91,7 +131,10 @@ if (GGML_SYCL_F16)
|
|
|
91
131
|
add_compile_definitions(GGML_SYCL_F16)
|
|
92
132
|
endif()
|
|
93
133
|
|
|
94
|
-
if (GGML_SYCL_TARGET STREQUAL "
|
|
134
|
+
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
|
135
|
+
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
|
|
136
|
+
target_link_options(ggml-sycl PRIVATE -Xs -ze-intel-greater-than-4GB-buffer-required)
|
|
137
|
+
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
|
95
138
|
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
|
96
139
|
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
97
140
|
# INFO: Allowed Sub_group_sizes are not consistent through all
|
|
@@ -100,7 +143,8 @@ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
|
100
143
|
# Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
|
|
101
144
|
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
|
102
145
|
else()
|
|
103
|
-
|
|
146
|
+
# default for other target
|
|
147
|
+
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
|
104
148
|
endif()
|
|
105
149
|
|
|
106
150
|
if (GGML_SYCL_GRAPH)
|
|
@@ -187,3 +231,4 @@ if (GGML_SYCL_DEVICE_ARCH)
|
|
|
187
231
|
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
|
188
232
|
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
|
189
233
|
endif()
|
|
234
|
+
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
#include <sycl/sycl.hpp>
|
|
2
|
+
#include "common.hpp"
|
|
3
|
+
#include "add-id.hpp"
|
|
4
|
+
|
|
5
|
+
static void add_id_kernel(
|
|
6
|
+
const float* src0,
|
|
7
|
+
const float* src1,
|
|
8
|
+
const int32_t* src2,
|
|
9
|
+
float* dst,
|
|
10
|
+
int64_t ne0,
|
|
11
|
+
int64_t ne1,
|
|
12
|
+
size_t nb01,
|
|
13
|
+
size_t nb02,
|
|
14
|
+
size_t nb11,
|
|
15
|
+
size_t nb21,
|
|
16
|
+
sycl::nd_item<3> item_ct1) {
|
|
17
|
+
const int64_t i1 = item_ct1.get_group(2);
|
|
18
|
+
const int64_t i2 = item_ct1.get_group(1);
|
|
19
|
+
|
|
20
|
+
const int i11 =
|
|
21
|
+
*(const int32_t*)((const char*)src2 + i1 * sizeof(int32_t) + i2 * nb21);
|
|
22
|
+
|
|
23
|
+
const size_t nb1 = ne0 * sizeof(float);
|
|
24
|
+
const size_t nb2 = ne1 * nb1;
|
|
25
|
+
|
|
26
|
+
float* dst_row = (float*)((char*)dst + i1 * nb1 + i2 * nb2);
|
|
27
|
+
const float* src0_row =
|
|
28
|
+
(const float*)((const char*)src0 + i1 * nb01 + i2 * nb02);
|
|
29
|
+
const float* src1_row = (const float*)((const char*)src1 + i11 * nb11);
|
|
30
|
+
|
|
31
|
+
for (int64_t i0 = item_ct1.get_local_id(2); i0 < ne0;
|
|
32
|
+
i0 += item_ct1.get_local_range(2)) {
|
|
33
|
+
dst_row[i0] = src0_row[i0] + src1_row[i0];
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
void ggml_sycl_add_id(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
|
38
|
+
const ggml_tensor* src0 = dst->src[0];
|
|
39
|
+
const ggml_tensor* src1 = dst->src[1];
|
|
40
|
+
const ggml_tensor* src2 = dst->src[2];
|
|
41
|
+
|
|
42
|
+
GGML_TENSOR_TERNARY_OP_LOCALS
|
|
43
|
+
|
|
44
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
45
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
46
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
47
|
+
GGML_ASSERT(src2->type == GGML_TYPE_I32);
|
|
48
|
+
|
|
49
|
+
GGML_ASSERT(nb00 == sizeof(float));
|
|
50
|
+
GGML_ASSERT(nb10 == sizeof(float));
|
|
51
|
+
GGML_ASSERT(nb20 == sizeof(int32_t));
|
|
52
|
+
|
|
53
|
+
const float* src0_d = (const float*)src0->data;
|
|
54
|
+
const float* src1_d = (const float*)src1->data;
|
|
55
|
+
const int32_t* src2_d = (const int32_t*)src2->data;
|
|
56
|
+
float* dst_d = (float*)dst->data;
|
|
57
|
+
|
|
58
|
+
int threads = std::min((int)ne00, 768); // cols
|
|
59
|
+
ctx.stream()->parallel_for(
|
|
60
|
+
sycl::nd_range<3>(
|
|
61
|
+
sycl::range<3>(1, ne02, ne01) * sycl::range<3>(1, 1, threads),
|
|
62
|
+
sycl::range<3>(1, 1, threads)),
|
|
63
|
+
[=](sycl::nd_item<3> item_ct1) {
|
|
64
|
+
add_id_kernel(
|
|
65
|
+
src0_d,
|
|
66
|
+
src1_d,
|
|
67
|
+
src2_d,
|
|
68
|
+
dst_d,
|
|
69
|
+
ne0,
|
|
70
|
+
ne1,
|
|
71
|
+
nb01,
|
|
72
|
+
nb02,
|
|
73
|
+
nb11,
|
|
74
|
+
nb21,
|
|
75
|
+
item_ct1);
|
|
76
|
+
});
|
|
77
|
+
}
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
#include "concat.hpp"
|
|
19
19
|
#include "conv.hpp"
|
|
20
20
|
#include "convert.hpp"
|
|
21
|
+
#include "count-equal.hpp"
|
|
21
22
|
#include "cpy.hpp"
|
|
22
23
|
#include "dequantize.hpp"
|
|
23
24
|
#include "dmmv.hpp"
|
|
@@ -28,12 +29,17 @@
|
|
|
28
29
|
#include "mmvq.hpp"
|
|
29
30
|
#include "norm.hpp"
|
|
30
31
|
#include "outprod.hpp"
|
|
32
|
+
#include "pad.hpp"
|
|
31
33
|
#include "quantize.hpp"
|
|
32
34
|
#include "quants.hpp"
|
|
35
|
+
#include "roll.hpp"
|
|
33
36
|
#include "rope.hpp"
|
|
34
37
|
#include "set_rows.hpp"
|
|
38
|
+
#include "ssm_conv.hpp"
|
|
35
39
|
#include "softmax.hpp"
|
|
36
40
|
#include "tsembd.hpp"
|
|
37
41
|
#include "wkv.hpp"
|
|
42
|
+
#include "pad_reflect_1d.hpp"
|
|
43
|
+
|
|
38
44
|
|
|
39
45
|
#endif // GGML_SYCL_BACKEND_HPP
|
|
@@ -303,10 +303,6 @@ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
|
|
303
303
|
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, dst->src[0], dst->src[1], dst);
|
|
304
304
|
}
|
|
305
305
|
|
|
306
|
-
inline void ggml_sycl_op_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
307
|
-
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_count_equal>>(ctx, dst->src[0], dst->src[1], dst);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
306
|
inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
311
307
|
|
|
312
308
|
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, dst->src[0], dst->src[1], dst);
|
|
@@ -332,11 +328,6 @@ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
|
332
328
|
ggml_sycl_op_sub(ctx, dst);
|
|
333
329
|
}
|
|
334
330
|
|
|
335
|
-
void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
336
|
-
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
|
337
|
-
ggml_sycl_op_count_equal(ctx, dst);
|
|
338
|
-
}
|
|
339
|
-
|
|
340
331
|
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
341
332
|
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
|
342
333
|
ggml_sycl_op_mul(ctx, dst);
|
|
@@ -16,12 +16,6 @@ static __dpct_inline__ float op_sub(const float a, const float b) {
|
|
|
16
16
|
return a - b;
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
-
static __dpct_inline__ float op_count_equal(const float a, const float b) {
|
|
20
|
-
return (a == b) ? 1.0f : 0.0f;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
24
|
-
|
|
25
19
|
static __dpct_inline__ float op_mul(const float a, const float b) {
|
|
26
20
|
return a * b;
|
|
27
21
|
}
|
|
@@ -195,8 +195,10 @@ struct optimize_feature {
|
|
|
195
195
|
|
|
196
196
|
struct sycl_device_info {
|
|
197
197
|
int cc; // compute capability
|
|
198
|
-
|
|
198
|
+
int nsm; // number of streaming multiprocessors (CUDA) maps to the maximum
|
|
199
|
+
// number of compute units on a SYCL device.
|
|
199
200
|
// size_t smpb; // max. shared memory per block
|
|
201
|
+
size_t smpbo; // max. shared memory per block (with opt-in)
|
|
200
202
|
bool vmm; // virtual memory support
|
|
201
203
|
size_t total_vram;
|
|
202
204
|
//sycl_hw_info hw_info; \\ device id and aarch, currently not used
|
|
@@ -416,13 +418,6 @@ static __dpct_inline__ float warp_reduce_sum(float x,
|
|
|
416
418
|
const sycl::nd_item<3>& item_ct1) {
|
|
417
419
|
#pragma unroll
|
|
418
420
|
for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
|
|
419
|
-
/*
|
|
420
|
-
DPCT1096:98: The right-most dimension of the work-group used in the SYCL
|
|
421
|
-
kernel that calls this function may be less than "32". The function
|
|
422
|
-
"dpct::permute_sub_group_by_xor" may return an unexpected result on the
|
|
423
|
-
CPU device. Modify the size of the work-group to ensure that the value
|
|
424
|
-
of the right-most dimension is a multiple of "32".
|
|
425
|
-
*/
|
|
426
421
|
x += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), x, mask);
|
|
427
422
|
}
|
|
428
423
|
return x;
|
|
@@ -440,17 +435,67 @@ warp_reduce_sum(sycl::float2 a, const sycl::nd_item<3>& item_ct1) {
|
|
|
440
435
|
return a;
|
|
441
436
|
}
|
|
442
437
|
|
|
438
|
+
template <int width = WARP_SIZE>
|
|
439
|
+
static __dpct_inline__ int warp_reduce_sum(int x) {
|
|
440
|
+
return sycl::reduce_over_group(
|
|
441
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), x, sycl::plus<>());
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
template <int width = WARP_SIZE>
|
|
445
|
+
static __dpct_inline__ float warp_reduce_sum(float x) {
|
|
446
|
+
#pragma unroll
|
|
447
|
+
for (int offset = width / 2; offset > 0; offset >>= 1) {
|
|
448
|
+
x += dpct::permute_sub_group_by_xor(
|
|
449
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), x, offset, width);
|
|
450
|
+
}
|
|
451
|
+
return x;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
template <int width = WARP_SIZE>
|
|
455
|
+
static __dpct_inline__ sycl::float2 warp_reduce_sum(sycl::float2 a) {
|
|
456
|
+
#pragma unroll
|
|
457
|
+
for (int offset = width / 2; offset > 0; offset >>= 1) {
|
|
458
|
+
a.x() += dpct::permute_sub_group_by_xor(
|
|
459
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), a.x(), offset,
|
|
460
|
+
width);
|
|
461
|
+
a.y() += dpct::permute_sub_group_by_xor(
|
|
462
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), a.y(), offset,
|
|
463
|
+
width);
|
|
464
|
+
}
|
|
465
|
+
return a;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
template <int width = WARP_SIZE>
|
|
469
|
+
static __dpct_inline__ sycl::half2 warp_reduce_sum(sycl::half2 a) {
|
|
470
|
+
#pragma unroll
|
|
471
|
+
for (int offset = width / 2; offset > 0; offset >>= 1) {
|
|
472
|
+
a = a + dpct::permute_sub_group_by_xor(
|
|
473
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), a, offset,
|
|
474
|
+
width);
|
|
475
|
+
}
|
|
476
|
+
return a;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
static constexpr int ggml_sycl_get_physical_warp_size() {
|
|
480
|
+
// todo: for old iGPU + dGPU case, need to be changed.
|
|
481
|
+
return WARP_SIZE;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
template <int width = WARP_SIZE>
|
|
485
|
+
static __dpct_inline__ float warp_reduce_max(float x) {
|
|
486
|
+
#pragma unroll
|
|
487
|
+
for (int offset = width / 2; offset > 0; offset >>= 1) {
|
|
488
|
+
x = sycl::fmax(x, dpct::permute_sub_group_by_xor(
|
|
489
|
+
sycl::ext::oneapi::this_work_item::get_sub_group(), x,
|
|
490
|
+
offset, width));
|
|
491
|
+
}
|
|
492
|
+
return x;
|
|
493
|
+
}
|
|
494
|
+
|
|
443
495
|
static __dpct_inline__ float warp_reduce_max(float x,
|
|
444
496
|
const sycl::nd_item<3>& item_ct1) {
|
|
445
497
|
#pragma unroll
|
|
446
498
|
for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
|
|
447
|
-
/*
|
|
448
|
-
DPCT1096:97: The right-most dimension of the work-group used in the SYCL
|
|
449
|
-
kernel that calls this function may be less than "32". The function
|
|
450
|
-
"dpct::permute_sub_group_by_xor" may return an unexpected result on the
|
|
451
|
-
CPU device. Modify the size of the work-group to ensure that the value
|
|
452
|
-
of the right-most dimension is a multiple of "32".
|
|
453
|
-
*/
|
|
454
499
|
x = sycl::fmax(x, dpct::permute_sub_group_by_xor(
|
|
455
500
|
item_ct1.get_sub_group(), x, mask));
|
|
456
501
|
}
|
|
@@ -558,4 +603,61 @@ struct scope_op_debug_print {
|
|
|
558
603
|
std::string_view func_suffix;
|
|
559
604
|
};
|
|
560
605
|
|
|
606
|
+
static __dpct_inline__ float get_alibi_slope(const float max_bias,
|
|
607
|
+
const uint32_t h,
|
|
608
|
+
const uint32_t n_head_log2,
|
|
609
|
+
const float m0,
|
|
610
|
+
const float m1) {
|
|
611
|
+
if (max_bias <= 0.0f) {
|
|
612
|
+
return 1.0f;
|
|
613
|
+
}
|
|
614
|
+
const float base = h < n_head_log2 ? m0 : m1;
|
|
615
|
+
const int exph = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
|
|
616
|
+
|
|
617
|
+
return dpct::pow(base, exph);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static const sycl::uint3 init_fastdiv_values(uint32_t d) {
|
|
621
|
+
GGML_ASSERT(d != 0);
|
|
622
|
+
|
|
623
|
+
uint32_t L = 0;
|
|
624
|
+
while (L < 32 && (uint32_t{ 1 } << L) < d) {
|
|
625
|
+
L++;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
uint32_t mp = (uint32_t) ((uint64_t{ 1 } << 32) * ((uint64_t{ 1 } << L) - d) / d + 1);
|
|
629
|
+
return sycl::uint3(mp, L, d);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
static __dpct_inline__ uint32_t fastdiv(uint32_t n, const sycl::uint3 fastdiv_values) {
|
|
634
|
+
const uint32_t hi = sycl::mul_hi<unsigned>(n, fastdiv_values.x());
|
|
635
|
+
return (hi + n) >> fastdiv_values.y();
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
static __dpct_inline__ sycl::uint2 fast_div_modulo(uint32_t n, const sycl::uint3 fastdiv_values) {
|
|
640
|
+
const uint32_t div_val = fastdiv(n, fastdiv_values);
|
|
641
|
+
const uint32_t mod_val = n - div_val * fastdiv_values.z();
|
|
642
|
+
return sycl::uint2(div_val, mod_val);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
static __dpct_inline__ int ggml_sycl_dp4a(const int a, const int b, int c) {
|
|
646
|
+
return dpct::dp4a(a, b, c);
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
static __dpct_inline__ float ggml_sycl_e8m0_to_fp32(uint8_t x) {
|
|
650
|
+
uint32_t bits;
|
|
651
|
+
if (x == 0) {
|
|
652
|
+
bits = 0x00400000;
|
|
653
|
+
} else {
|
|
654
|
+
bits = (uint32_t) x << 23;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
float result;
|
|
658
|
+
memcpy(&result, &bits, sizeof(float));
|
|
659
|
+
return result;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
|
|
561
663
|
#endif // GGML_SYCL_COMMON_HPP
|
|
@@ -11,9 +11,13 @@
|
|
|
11
11
|
//
|
|
12
12
|
|
|
13
13
|
#include "concat.hpp"
|
|
14
|
-
#include "common.hpp"
|
|
15
14
|
|
|
16
|
-
static
|
|
15
|
+
static inline size_t elem_size(ggml_type t) {
|
|
16
|
+
return ggml_type_size(t) / ggml_blck_size(t);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
template <typename T>
|
|
20
|
+
static void concat_T_dim0(const T *x, const T *y, T *dst,
|
|
17
21
|
const int ne0, const int ne00,
|
|
18
22
|
const sycl::nd_item<3> &item_ct1) {
|
|
19
23
|
int nidx = item_ct1.get_local_id(2) +
|
|
@@ -36,7 +40,8 @@ static void concat_f32_dim0(const float *x, const float *y, float *dst,
|
|
|
36
40
|
}
|
|
37
41
|
}
|
|
38
42
|
|
|
39
|
-
|
|
43
|
+
template <typename T>
|
|
44
|
+
static void concat_T_dim1(const T *x, const T *y, T *dst,
|
|
40
45
|
const int ne0, const int ne01,
|
|
41
46
|
const sycl::nd_item<3> &item_ct1) {
|
|
42
47
|
int nidx = item_ct1.get_local_id(2) +
|
|
@@ -59,7 +64,8 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
|
|
|
59
64
|
}
|
|
60
65
|
}
|
|
61
66
|
|
|
62
|
-
|
|
67
|
+
template <typename T>
|
|
68
|
+
static void concat_T_dim2(const T *x, const T *y, T *dst,
|
|
63
69
|
const int ne0, const int ne02,
|
|
64
70
|
const sycl::nd_item<3> &item_ct1) {
|
|
65
71
|
int nidx = item_ct1.get_local_id(2) +
|
|
@@ -82,45 +88,35 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
|
|
|
82
88
|
}
|
|
83
89
|
}
|
|
84
90
|
|
|
85
|
-
|
|
91
|
+
template <typename T>
|
|
92
|
+
static void concat_T_sycl(const T *x, const T *y, T *dst,
|
|
86
93
|
int ne00, int ne01, int ne02, int ne0, int ne1,
|
|
87
94
|
int ne2, int dim, queue_ptr stream) {
|
|
88
95
|
int num_blocks = (ne0 + SYCL_CONCAT_BLOCK_SIZE - 1) / SYCL_CONCAT_BLOCK_SIZE;
|
|
89
96
|
sycl::range<3> gridDim(ne2, ne1, num_blocks);
|
|
90
97
|
switch (dim) {
|
|
91
98
|
case 0:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
97
|
-
concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1);
|
|
98
|
-
});
|
|
99
|
-
break;
|
|
99
|
+
stream->parallel_for(sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
100
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
101
|
+
[=](sycl::nd_item<3> item_ct1) { concat_T_dim0<T>(x, y, dst, ne0, ne00, item_ct1); });
|
|
102
|
+
break;
|
|
100
103
|
case 1:
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
106
|
-
concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1);
|
|
107
|
-
});
|
|
108
|
-
break;
|
|
104
|
+
stream->parallel_for(sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
105
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
106
|
+
[=](sycl::nd_item<3> item_ct1) { concat_T_dim1<T>(x, y, dst, ne0, ne01, item_ct1); });
|
|
107
|
+
break;
|
|
109
108
|
// dim >=2 will be dispatched to the default path
|
|
110
109
|
default:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
116
|
-
concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1);
|
|
117
|
-
});
|
|
118
|
-
break;
|
|
110
|
+
stream->parallel_for(sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
|
|
111
|
+
sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
|
|
112
|
+
[=](sycl::nd_item<3> item_ct1) { concat_T_dim2<T>(x, y, dst, ne0, ne02, item_ct1); });
|
|
113
|
+
break;
|
|
119
114
|
}
|
|
120
115
|
}
|
|
121
116
|
|
|
122
117
|
// non-contiguous kernel (slow)
|
|
123
|
-
|
|
118
|
+
template<typename T>
|
|
119
|
+
static void concat_T_sycl_non_cont(
|
|
124
120
|
queue_ptr stream, const char *src0, const char *src1, char *dst,
|
|
125
121
|
int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03, uint64_t nb00,
|
|
126
122
|
uint64_t nb01, uint64_t nb02, uint64_t nb03, int64_t /*ne10*/,
|
|
@@ -137,24 +133,25 @@ static void concat_f32_sycl_non_cont(
|
|
|
137
133
|
int64_t o[4] = { 0, 0, 0, 0 };
|
|
138
134
|
o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
|
|
139
135
|
|
|
140
|
-
const
|
|
136
|
+
const T * x;
|
|
141
137
|
|
|
142
138
|
for (int i0 = item_ct1.get_local_id(2); i0 < ne0; i0 += item_ct1.get_local_range(2)) {
|
|
143
139
|
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
144
|
-
x = (const
|
|
140
|
+
x = (const T *) (src0 + (i3) *nb03 + (i2) *nb02 + (i1) *nb01 + (i0) *nb00);
|
|
145
141
|
} else {
|
|
146
|
-
x = (const
|
|
142
|
+
x = (const T *) (src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 + (i1 - o[1]) * nb11 +
|
|
147
143
|
(i0 - o[0]) * nb10);
|
|
148
144
|
}
|
|
149
145
|
|
|
150
|
-
|
|
146
|
+
T *y = (T *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0);
|
|
151
147
|
|
|
152
148
|
*y = *x;
|
|
153
149
|
}
|
|
154
150
|
});
|
|
155
151
|
}
|
|
156
152
|
|
|
157
|
-
|
|
153
|
+
template <typename T>
|
|
154
|
+
void concat_impl_sycl(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
158
155
|
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
|
159
156
|
const ggml_tensor * src0 = dst->src[0];
|
|
160
157
|
const ggml_tensor * src1 = dst->src[1];
|
|
@@ -163,15 +160,14 @@ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
|
163
160
|
const int32_t dim = ((int32_t *) dst->op_params)[0];
|
|
164
161
|
|
|
165
162
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
|
|
166
|
-
const
|
|
167
|
-
const
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
163
|
+
const T * src0_d = (const T *) src0->data;
|
|
164
|
+
const T * src1_d = (const T *) src1->data;
|
|
165
|
+
T * dst_d = (T *) dst->data;
|
|
166
|
+
size_t type_size = elem_size(dst->type);
|
|
171
167
|
if (dim != 3) {
|
|
172
168
|
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
|
|
173
|
-
|
|
174
|
-
dst_d + i3 * (dst->nb[3] /
|
|
169
|
+
concat_T_sycl<T>(src0_d + i3 * (src0->nb[3] / type_size), src1_d + i3 * (src1->nb[3] / type_size),
|
|
170
|
+
dst_d + i3 * (dst->nb[3] / type_size), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
|
|
175
171
|
dst->ne[1], dst->ne[2], dim, stream);
|
|
176
172
|
}
|
|
177
173
|
} else {
|
|
@@ -179,13 +175,28 @@ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
|
179
175
|
const size_t size1 = ggml_nbytes(src1);
|
|
180
176
|
|
|
181
177
|
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
|
|
182
|
-
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 /
|
|
178
|
+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / type_size, src1_d, size1).wait()));
|
|
183
179
|
}
|
|
184
180
|
} else {
|
|
185
|
-
|
|
181
|
+
concat_T_sycl_non_cont<T>(stream, (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
|
|
186
182
|
src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1],
|
|
187
183
|
src0->nb[2], src0->nb[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
|
|
188
184
|
src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
|
|
189
185
|
dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
|
|
190
186
|
}
|
|
191
187
|
}
|
|
188
|
+
|
|
189
|
+
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
|
190
|
+
|
|
191
|
+
switch (dst->type) {
|
|
192
|
+
case GGML_TYPE_F32:
|
|
193
|
+
concat_impl_sycl<float>(ctx, dst);
|
|
194
|
+
break;
|
|
195
|
+
case GGML_TYPE_I32:
|
|
196
|
+
concat_impl_sycl<int32_t>(ctx, dst);
|
|
197
|
+
break;
|
|
198
|
+
default:
|
|
199
|
+
GGML_ASSERT(false && "ggml_sycl_op_concat: unsupported type");
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
#include "dequantize.hpp"
|
|
3
3
|
#include "presets.hpp"
|
|
4
4
|
|
|
5
|
+
#if defined(__INTEL_LLVM_COMPILER)
|
|
6
|
+
#if __has_include(<sycl/ext/oneapi/bfloat16.hpp>)
|
|
7
|
+
#include <sycl/ext/oneapi/bfloat16.hpp>
|
|
8
|
+
#define GGML_SYCL_HAS_BF16
|
|
9
|
+
#endif
|
|
10
|
+
#endif
|
|
11
|
+
|
|
5
12
|
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
|
6
13
|
static void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t k,
|
|
7
14
|
const sycl::nd_item<3> &item_ct1) {
|
|
@@ -465,6 +472,16 @@ static void dequantize_row_iq4_nl_sycl(const void *vx, dst_t *y, const int64_t k
|
|
|
465
472
|
}
|
|
466
473
|
}
|
|
467
474
|
|
|
475
|
+
template <typename dst_t>
|
|
476
|
+
static void dequantize_row_mxfp4_sycl(const void * vx, dst_t * y, const int64_t k, dpct::queue_ptr stream) {
|
|
477
|
+
const int nb = (k + QK_K - 1) / QK_K;
|
|
478
|
+
stream->parallel_for(
|
|
479
|
+
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
|
480
|
+
[=](sycl::nd_item<3> item_ct1) {
|
|
481
|
+
dequantize_block_mxfp4(vx, y, item_ct1);
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
|
|
468
485
|
template <typename src_t, typename dst_t>
|
|
469
486
|
static void convert_unary_nc(const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01,
|
|
470
487
|
const int64_t ne02, const int64_t s01, const int64_t s02, const int64_t s03,
|
|
@@ -511,6 +528,7 @@ static void convert_unary_sycl(const void * vx, dst_t * y, const int64_t k, dpct
|
|
|
511
528
|
convert_unary_nc_sycl<src_t>(vx, y, k, 1, 1, 1, k, k, k, queue);
|
|
512
529
|
}
|
|
513
530
|
|
|
531
|
+
|
|
514
532
|
to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type, ggml_tensor * dst) {
|
|
515
533
|
switch (type) {
|
|
516
534
|
case GGML_TYPE_Q4_0:
|
|
@@ -564,8 +582,14 @@ to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type, ggml_tensor * dst) {
|
|
|
564
582
|
return dequantize_row_iq4_xs_sycl;
|
|
565
583
|
case GGML_TYPE_IQ4_NL:
|
|
566
584
|
return dequantize_row_iq4_nl_sycl;
|
|
585
|
+
case GGML_TYPE_MXFP4:
|
|
586
|
+
return dequantize_row_mxfp4_sycl;
|
|
567
587
|
case GGML_TYPE_F32:
|
|
568
588
|
return convert_unary_sycl<float>;
|
|
589
|
+
#ifdef GGML_SYCL_HAS_BF16
|
|
590
|
+
case GGML_TYPE_BF16:
|
|
591
|
+
return convert_unary_sycl<sycl::ext::oneapi::bfloat16>;
|
|
592
|
+
#endif
|
|
569
593
|
default:
|
|
570
594
|
return nullptr;
|
|
571
595
|
}
|
|
@@ -625,8 +649,14 @@ to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type, ggml_tensor *dst) {
|
|
|
625
649
|
return dequantize_row_iq4_xs_sycl;
|
|
626
650
|
case GGML_TYPE_IQ4_NL:
|
|
627
651
|
return dequantize_row_iq4_nl_sycl;
|
|
652
|
+
case GGML_TYPE_MXFP4:
|
|
653
|
+
return dequantize_row_mxfp4_sycl;
|
|
628
654
|
case GGML_TYPE_F16:
|
|
629
655
|
return convert_unary_sycl<sycl::half>;
|
|
656
|
+
#ifdef GGML_SYCL_HAS_BF16
|
|
657
|
+
case GGML_TYPE_BF16:
|
|
658
|
+
return convert_unary_sycl<sycl::ext::oneapi::bfloat16>;
|
|
659
|
+
#endif
|
|
630
660
|
default:
|
|
631
661
|
return nullptr;
|
|
632
662
|
}
|
|
@@ -636,6 +666,10 @@ to_fp16_nc_sycl_t get_to_fp16_nc_sycl(ggml_type type) {
|
|
|
636
666
|
switch (type) {
|
|
637
667
|
case GGML_TYPE_F32:
|
|
638
668
|
return convert_unary_nc_sycl<float>;
|
|
669
|
+
#ifdef GGML_SYCL_HAS_BF16
|
|
670
|
+
case GGML_TYPE_BF16:
|
|
671
|
+
return convert_unary_nc_sycl<sycl::ext::oneapi::bfloat16>;
|
|
672
|
+
#endif
|
|
639
673
|
default:
|
|
640
674
|
return nullptr;
|
|
641
675
|
}
|