whispercpp 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +79 -25
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/addon.cpp +19 -19
- data/ext/sources/examples/addon.node/index.js +7 -5
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/bench/bench.cpp +26 -16
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +122 -111
- data/ext/sources/examples/command/command.cpp +26 -24
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/lsp/lsp.cpp +19 -17
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +34 -24
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +4 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +7 -3
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +113 -7
- data/ext/sources/examples/talk-llama/llama-adapter.h +13 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2136 -1491
- data/ext/sources/examples/talk-llama/llama-arch.h +125 -3
- data/ext/sources/examples/talk-llama/llama-batch.cpp +174 -100
- data/ext/sources/examples/talk-llama/llama-batch.h +46 -20
- data/ext/sources/examples/talk-llama/llama-chat.cpp +199 -8
- data/ext/sources/examples/talk-llama/llama-chat.h +11 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +1213 -413
- data/ext/sources/examples/talk-llama/llama-context.h +99 -36
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -4
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +883 -294
- data/ext/sources/examples/talk-llama/llama-graph.h +361 -161
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +144 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +100 -23
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +328 -0
- data/ext/sources/examples/talk-llama/{llama-kv-cache-unified-iswa.h → llama-kv-cache-iswa.h} +38 -29
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2100 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +373 -27
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +124 -30
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +63 -41
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +30 -29
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +77 -35
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +15 -16
- data/ext/sources/examples/talk-llama/llama-memory.h +16 -10
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +93 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +9 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +3369 -10145
- data/ext/sources/examples/talk-llama/llama-model.h +104 -12
- data/ext/sources/examples/talk-llama/llama-quant.cpp +53 -30
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1520 -324
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +562 -39
- data/ext/sources/examples/talk-llama/llama-vocab.h +50 -0
- data/ext/sources/examples/talk-llama/llama.cpp +794 -12
- data/ext/sources/examples/talk-llama/llama.h +246 -190
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +9 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +309 -16
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +4 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +18 -17
- data/ext/sources/ggml/CMakeLists.txt +135 -79
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +21 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -1
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +406 -23
- data/ext/sources/ggml/src/CMakeLists.txt +99 -13
- data/ext/sources/ggml/src/ggml-alloc.c +368 -161
- data/ext/sources/ggml/src/ggml-backend-impl.h +5 -5
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +55 -14
- data/ext/sources/ggml/src/ggml-backend.cpp +290 -57
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +10 -13
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +59 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2586 -1917
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +348 -309
- data/ext/sources/ggml/src/ggml-cann/common.h +350 -133
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +894 -625
- data/ext/sources/ggml/src/ggml-common.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +167 -75
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +560 -622
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1002 -270
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +107 -587
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +162 -589
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +373 -486
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +3 -58
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +521 -353
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +54 -314
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +184 -675
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +4682 -1660
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +82 -4
- data/ext/sources/ggml/src/ggml-cpu/common.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +18 -9
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +263 -111
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +39 -28
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +683 -82
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +38 -43
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +435 -119
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1234 -1182
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +2167 -1480
- data/ext/sources/ggml/src/ggml-cpu/ops.h +10 -12
- data/ext/sources/ggml/src/ggml-cpu/quants.c +35 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1132 -81
- data/ext/sources/ggml/src/ggml-cpu/repack.h +36 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +120 -93
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/traits.cpp +2 -2
- data/ext/sources/ggml/src/ggml-cpu/traits.h +1 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +294 -27
- data/ext/sources/ggml/src/ggml-cpu/vec.h +606 -48
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +92 -17
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +588 -128
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +95 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +25 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +335 -485
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +519 -378
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +750 -637
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1244 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +98 -61
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +230 -197
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1557 -294
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +57 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +915 -69
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +171 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +835 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +109 -67
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1601 -733
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +802 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +286 -149
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +86 -32
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +163 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +207 -98
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +330 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +325 -61
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -12
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +291 -104
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +21 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +351 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +21 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +189 -5
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +44 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +248 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +8 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +70 -37
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +10 -3
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +16 -13
- data/ext/sources/ggml/src/ggml-impl.h +186 -15
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -7
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +609 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1743 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +273 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1686 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +356 -61
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +4161 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +724 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +4495 -1876
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +21 -9
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +29 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4005 -427
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +147 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +177 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +49 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +33 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +111 -16
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +497 -195
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +6 -5
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +50 -30
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +10 -4
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +200 -99
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +72 -309
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +77 -34
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +397 -314
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +14 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +9 -6
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +643 -413
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +80 -60
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +223 -132
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +230 -55
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +8 -9
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +65 -59
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +330 -165
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +60 -6
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +16 -12
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +7398 -2635
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +43 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +15 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +56 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +5 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +67 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +158 -16
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +38 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +103 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +220 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +139 -45
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +113 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +75 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +19 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +33 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +227 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +20 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -556
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +230 -51
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +566 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +90 -223
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +195 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +41 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +59 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +104 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -52
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +30 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +16 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +435 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +148 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +619 -177
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3087 -0
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +147 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +901 -129
- data/ext/sources/ggml/src/gguf.cpp +8 -1
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +124 -81
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +7 -5
- data/ext/sources/tests/test-vad.cpp +3 -3
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +126 -2
- data/test/test_params.rb +24 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +8 -1
- data/whispercpp.gemspec +1 -1
- metadata +439 -179
- data/ext/sources/build-xcframework.sh +0 -547
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +0 -279
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +0 -1841
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +0 -303
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -506
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +0 -11
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -6280
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +0 -162
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -118
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -99
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -58
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
|
@@ -204,6 +204,10 @@
|
|
|
204
204
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
205
205
|
#endif
|
|
206
206
|
|
|
207
|
+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
|
|
208
|
+
# define _WIN32_WINNT 0x0A00
|
|
209
|
+
#endif
|
|
210
|
+
|
|
207
211
|
#include <stdbool.h>
|
|
208
212
|
#include <stddef.h>
|
|
209
213
|
#include <stdint.h>
|
|
@@ -230,6 +234,11 @@
|
|
|
230
234
|
|
|
231
235
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
|
232
236
|
#define GGML_MEM_ALIGN 4
|
|
237
|
+
#elif defined(__EMSCRIPTEN__)
|
|
238
|
+
// emscripten uses max_align_t == 8, so we need GGML_MEM_ALIGN == 8 for 64-bit wasm.
|
|
239
|
+
// (for 32-bit wasm, the first conditional is true and GGML_MEM_ALIGN stays 4.)
|
|
240
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/18628
|
|
241
|
+
#define GGML_MEM_ALIGN 8
|
|
233
242
|
#else
|
|
234
243
|
#define GGML_MEM_ALIGN 16
|
|
235
244
|
#endif
|
|
@@ -237,11 +246,23 @@
|
|
|
237
246
|
#define GGML_EXIT_SUCCESS 0
|
|
238
247
|
#define GGML_EXIT_ABORTED 1
|
|
239
248
|
|
|
249
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
250
|
+
#define GGML_ROPE_TYPE_NORMAL 0
|
|
240
251
|
#define GGML_ROPE_TYPE_NEOX 2
|
|
241
252
|
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
253
|
#define GGML_ROPE_TYPE_VISION 24
|
|
254
|
+
#define GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
|
|
255
|
+
|
|
256
|
+
#define GGML_MROPE_SECTIONS 4
|
|
243
257
|
|
|
244
258
|
#define GGML_UNUSED(x) (void)(x)
|
|
259
|
+
#ifdef __CUDACC__
|
|
260
|
+
template<typename... Args>
|
|
261
|
+
__host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
|
|
262
|
+
#define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
|
|
263
|
+
#else
|
|
264
|
+
#define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
|
|
265
|
+
#endif // __CUDACC__
|
|
245
266
|
|
|
246
267
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
247
268
|
|
|
@@ -275,19 +296,19 @@
|
|
|
275
296
|
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
|
276
297
|
//
|
|
277
298
|
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
|
278
|
-
const type prefix##0 = (pointer)->array[0]; \
|
|
299
|
+
const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
|
|
279
300
|
GGML_UNUSED(prefix##0);
|
|
280
301
|
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
|
281
302
|
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
|
282
|
-
const type prefix##1 = (pointer)->array[1]; \
|
|
303
|
+
const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
|
|
283
304
|
GGML_UNUSED(prefix##1);
|
|
284
305
|
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
|
285
306
|
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
|
286
|
-
const type prefix##2 = (pointer)->array[2]; \
|
|
307
|
+
const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
|
|
287
308
|
GGML_UNUSED(prefix##2);
|
|
288
309
|
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
|
289
310
|
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
|
290
|
-
const type prefix##3 = (pointer)->array[3]; \
|
|
311
|
+
const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
|
|
291
312
|
GGML_UNUSED(prefix##3);
|
|
292
313
|
|
|
293
314
|
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
|
@@ -304,6 +325,16 @@
|
|
|
304
325
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
|
305
326
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
|
306
327
|
|
|
328
|
+
#define GGML_TENSOR_TERNARY_OP_LOCALS \
|
|
329
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
|
330
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
|
331
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
|
332
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
|
333
|
+
GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
|
|
334
|
+
GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
|
|
335
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
|
336
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
|
337
|
+
|
|
307
338
|
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
|
308
339
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
|
309
340
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
|
@@ -314,6 +345,13 @@
|
|
|
314
345
|
extern "C" {
|
|
315
346
|
#endif
|
|
316
347
|
|
|
348
|
+
// Function type used in fatal error callbacks
|
|
349
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
350
|
+
|
|
351
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
352
|
+
// Returns the old callback for chaining
|
|
353
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
354
|
+
|
|
317
355
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
356
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
357
|
|
|
@@ -388,7 +426,8 @@ extern "C" {
|
|
|
388
426
|
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
|
389
427
|
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
390
428
|
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
391
|
-
|
|
429
|
+
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
|
430
|
+
GGML_TYPE_COUNT = 40,
|
|
392
431
|
};
|
|
393
432
|
|
|
394
433
|
// precision
|
|
@@ -423,6 +462,7 @@ extern "C" {
|
|
|
423
462
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
424
463
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
425
464
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
465
|
+
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
|
|
426
466
|
};
|
|
427
467
|
|
|
428
468
|
// available tensor operations:
|
|
@@ -431,6 +471,7 @@ extern "C" {
|
|
|
431
471
|
|
|
432
472
|
GGML_OP_DUP,
|
|
433
473
|
GGML_OP_ADD,
|
|
474
|
+
GGML_OP_ADD_ID,
|
|
434
475
|
GGML_OP_ADD1,
|
|
435
476
|
GGML_OP_ACC,
|
|
436
477
|
GGML_OP_SUB,
|
|
@@ -443,6 +484,7 @@ extern "C" {
|
|
|
443
484
|
GGML_OP_COS,
|
|
444
485
|
GGML_OP_SUM,
|
|
445
486
|
GGML_OP_SUM_ROWS,
|
|
487
|
+
GGML_OP_CUMSUM,
|
|
446
488
|
GGML_OP_MEAN,
|
|
447
489
|
GGML_OP_ARGMAX,
|
|
448
490
|
GGML_OP_COUNT_EQUAL,
|
|
@@ -482,20 +524,25 @@ extern "C" {
|
|
|
482
524
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
483
525
|
GGML_OP_IM2COL,
|
|
484
526
|
GGML_OP_IM2COL_BACK,
|
|
527
|
+
GGML_OP_IM2COL_3D,
|
|
485
528
|
GGML_OP_CONV_2D,
|
|
529
|
+
GGML_OP_CONV_3D,
|
|
486
530
|
GGML_OP_CONV_2D_DW,
|
|
487
531
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
488
532
|
GGML_OP_POOL_1D,
|
|
489
533
|
GGML_OP_POOL_2D,
|
|
490
534
|
GGML_OP_POOL_2D_BACK,
|
|
491
|
-
GGML_OP_UPSCALE,
|
|
535
|
+
GGML_OP_UPSCALE,
|
|
492
536
|
GGML_OP_PAD,
|
|
493
537
|
GGML_OP_PAD_REFLECT_1D,
|
|
494
538
|
GGML_OP_ROLL,
|
|
495
539
|
GGML_OP_ARANGE,
|
|
496
540
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
497
541
|
GGML_OP_ARGSORT,
|
|
542
|
+
GGML_OP_TOP_K,
|
|
498
543
|
GGML_OP_LEAKY_RELU,
|
|
544
|
+
GGML_OP_TRI,
|
|
545
|
+
GGML_OP_FILL,
|
|
499
546
|
|
|
500
547
|
GGML_OP_FLASH_ATTN_EXT,
|
|
501
548
|
GGML_OP_FLASH_ATTN_BACK,
|
|
@@ -508,6 +555,7 @@ extern "C" {
|
|
|
508
555
|
GGML_OP_RWKV_WKV6,
|
|
509
556
|
GGML_OP_GATED_LINEAR_ATTN,
|
|
510
557
|
GGML_OP_RWKV_WKV7,
|
|
558
|
+
GGML_OP_SOLVE_TRI,
|
|
511
559
|
|
|
512
560
|
GGML_OP_UNARY,
|
|
513
561
|
|
|
@@ -520,6 +568,7 @@ extern "C" {
|
|
|
520
568
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
|
521
569
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
522
570
|
GGML_OP_OPT_STEP_ADAMW,
|
|
571
|
+
GGML_OP_OPT_STEP_SGD,
|
|
523
572
|
|
|
524
573
|
GGML_OP_GLU,
|
|
525
574
|
|
|
@@ -541,7 +590,14 @@ extern "C" {
|
|
|
541
590
|
GGML_UNARY_OP_HARDSWISH,
|
|
542
591
|
GGML_UNARY_OP_HARDSIGMOID,
|
|
543
592
|
GGML_UNARY_OP_EXP,
|
|
593
|
+
GGML_UNARY_OP_EXPM1,
|
|
594
|
+
GGML_UNARY_OP_SOFTPLUS,
|
|
544
595
|
GGML_UNARY_OP_GELU_ERF,
|
|
596
|
+
GGML_UNARY_OP_XIELU,
|
|
597
|
+
GGML_UNARY_OP_FLOOR,
|
|
598
|
+
GGML_UNARY_OP_CEIL,
|
|
599
|
+
GGML_UNARY_OP_ROUND,
|
|
600
|
+
GGML_UNARY_OP_TRUNC,
|
|
545
601
|
|
|
546
602
|
GGML_UNARY_OP_COUNT,
|
|
547
603
|
};
|
|
@@ -550,6 +606,9 @@ extern "C" {
|
|
|
550
606
|
GGML_GLU_OP_REGLU,
|
|
551
607
|
GGML_GLU_OP_GEGLU,
|
|
552
608
|
GGML_GLU_OP_SWIGLU,
|
|
609
|
+
GGML_GLU_OP_SWIGLU_OAI,
|
|
610
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
611
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
553
612
|
|
|
554
613
|
GGML_GLU_OP_COUNT,
|
|
555
614
|
};
|
|
@@ -577,6 +636,13 @@ extern "C" {
|
|
|
577
636
|
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
578
637
|
};
|
|
579
638
|
|
|
639
|
+
enum ggml_tri_type {
|
|
640
|
+
GGML_TRI_TYPE_UPPER_DIAG = 0,
|
|
641
|
+
GGML_TRI_TYPE_UPPER = 1,
|
|
642
|
+
GGML_TRI_TYPE_LOWER_DIAG = 2,
|
|
643
|
+
GGML_TRI_TYPE_LOWER = 3
|
|
644
|
+
};
|
|
645
|
+
|
|
580
646
|
struct ggml_init_params {
|
|
581
647
|
// memory pool
|
|
582
648
|
size_t mem_size; // bytes
|
|
@@ -639,6 +705,9 @@ extern "C" {
|
|
|
639
705
|
|
|
640
706
|
// misc
|
|
641
707
|
|
|
708
|
+
GGML_API const char * ggml_version(void);
|
|
709
|
+
GGML_API const char * ggml_commit(void);
|
|
710
|
+
|
|
642
711
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
643
712
|
GGML_API int64_t ggml_time_ms(void);
|
|
644
713
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -819,6 +888,13 @@ extern "C" {
|
|
|
819
888
|
struct ggml_tensor * b,
|
|
820
889
|
enum ggml_type type);
|
|
821
890
|
|
|
891
|
+
// dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
|
|
892
|
+
GGML_API struct ggml_tensor * ggml_add_id(
|
|
893
|
+
struct ggml_context * ctx,
|
|
894
|
+
struct ggml_tensor * a,
|
|
895
|
+
struct ggml_tensor * b,
|
|
896
|
+
struct ggml_tensor * ids);
|
|
897
|
+
|
|
822
898
|
GGML_API struct ggml_tensor * ggml_add1(
|
|
823
899
|
struct ggml_context * ctx,
|
|
824
900
|
struct ggml_tensor * a,
|
|
@@ -904,6 +980,22 @@ extern "C" {
|
|
|
904
980
|
struct ggml_context * ctx,
|
|
905
981
|
struct ggml_tensor * a);
|
|
906
982
|
|
|
983
|
+
GGML_API struct ggml_tensor * ggml_expm1(
|
|
984
|
+
struct ggml_context * ctx,
|
|
985
|
+
struct ggml_tensor * a);
|
|
986
|
+
|
|
987
|
+
GGML_API struct ggml_tensor * ggml_expm1_inplace(
|
|
988
|
+
struct ggml_context * ctx,
|
|
989
|
+
struct ggml_tensor * a);
|
|
990
|
+
|
|
991
|
+
GGML_API struct ggml_tensor * ggml_softplus(
|
|
992
|
+
struct ggml_context * ctx,
|
|
993
|
+
struct ggml_tensor * a);
|
|
994
|
+
|
|
995
|
+
GGML_API struct ggml_tensor * ggml_softplus_inplace(
|
|
996
|
+
struct ggml_context * ctx,
|
|
997
|
+
struct ggml_tensor * a);
|
|
998
|
+
|
|
907
999
|
GGML_API struct ggml_tensor * ggml_sin(
|
|
908
1000
|
struct ggml_context * ctx,
|
|
909
1001
|
struct ggml_tensor * a);
|
|
@@ -930,6 +1022,10 @@ extern "C" {
|
|
|
930
1022
|
struct ggml_context * ctx,
|
|
931
1023
|
struct ggml_tensor * a);
|
|
932
1024
|
|
|
1025
|
+
GGML_API struct ggml_tensor * ggml_cumsum(
|
|
1026
|
+
struct ggml_context * ctx,
|
|
1027
|
+
struct ggml_tensor * a);
|
|
1028
|
+
|
|
933
1029
|
// mean along rows
|
|
934
1030
|
GGML_API struct ggml_tensor * ggml_mean(
|
|
935
1031
|
struct ggml_context * ctx,
|
|
@@ -1103,6 +1199,58 @@ extern "C" {
|
|
|
1103
1199
|
struct ggml_context * ctx,
|
|
1104
1200
|
struct ggml_tensor * a);
|
|
1105
1201
|
|
|
1202
|
+
GGML_API struct ggml_tensor * ggml_floor(
|
|
1203
|
+
struct ggml_context * ctx,
|
|
1204
|
+
struct ggml_tensor * a);
|
|
1205
|
+
|
|
1206
|
+
GGML_API struct ggml_tensor * ggml_floor_inplace(
|
|
1207
|
+
struct ggml_context * ctx,
|
|
1208
|
+
struct ggml_tensor * a);
|
|
1209
|
+
|
|
1210
|
+
GGML_API struct ggml_tensor * ggml_ceil(
|
|
1211
|
+
struct ggml_context * ctx,
|
|
1212
|
+
struct ggml_tensor * a);
|
|
1213
|
+
|
|
1214
|
+
GGML_API struct ggml_tensor * ggml_ceil_inplace(
|
|
1215
|
+
struct ggml_context * ctx,
|
|
1216
|
+
struct ggml_tensor * a);
|
|
1217
|
+
|
|
1218
|
+
GGML_API struct ggml_tensor * ggml_round(
|
|
1219
|
+
struct ggml_context * ctx,
|
|
1220
|
+
struct ggml_tensor * a);
|
|
1221
|
+
|
|
1222
|
+
GGML_API struct ggml_tensor * ggml_round_inplace(
|
|
1223
|
+
struct ggml_context * ctx,
|
|
1224
|
+
struct ggml_tensor * a);
|
|
1225
|
+
|
|
1226
|
+
/**
|
|
1227
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1228
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1229
|
+
* Similar to std::trunc in C/C++.
|
|
1230
|
+
*/
|
|
1231
|
+
|
|
1232
|
+
GGML_API struct ggml_tensor * ggml_trunc(
|
|
1233
|
+
struct ggml_context * ctx,
|
|
1234
|
+
struct ggml_tensor * a);
|
|
1235
|
+
|
|
1236
|
+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
|
|
1237
|
+
struct ggml_context * ctx,
|
|
1238
|
+
struct ggml_tensor * a);
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
// xIELU activation function
|
|
1243
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1244
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1245
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1246
|
+
GGML_API struct ggml_tensor * ggml_xielu(
|
|
1247
|
+
struct ggml_context * ctx,
|
|
1248
|
+
struct ggml_tensor * a,
|
|
1249
|
+
float alpha_n,
|
|
1250
|
+
float alpha_p,
|
|
1251
|
+
float beta,
|
|
1252
|
+
float eps);
|
|
1253
|
+
|
|
1106
1254
|
// gated linear unit ops
|
|
1107
1255
|
// A: n columns, r rows,
|
|
1108
1256
|
// result is n / 2 columns, r rows,
|
|
@@ -1137,6 +1285,22 @@ extern "C" {
|
|
|
1137
1285
|
struct ggml_context * ctx,
|
|
1138
1286
|
struct ggml_tensor * a);
|
|
1139
1287
|
|
|
1288
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1289
|
+
struct ggml_context * ctx,
|
|
1290
|
+
struct ggml_tensor * a);
|
|
1291
|
+
|
|
1292
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1293
|
+
struct ggml_context * ctx,
|
|
1294
|
+
struct ggml_tensor * a);
|
|
1295
|
+
|
|
1296
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1297
|
+
struct ggml_context * ctx,
|
|
1298
|
+
struct ggml_tensor * a);
|
|
1299
|
+
|
|
1300
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1301
|
+
struct ggml_context * ctx,
|
|
1302
|
+
struct ggml_tensor * a);
|
|
1303
|
+
|
|
1140
1304
|
// A: n columns, r rows,
|
|
1141
1305
|
// B: n columns, r rows,
|
|
1142
1306
|
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
@@ -1160,6 +1324,23 @@ extern "C" {
|
|
|
1160
1324
|
struct ggml_tensor * a,
|
|
1161
1325
|
struct ggml_tensor * b);
|
|
1162
1326
|
|
|
1327
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1328
|
+
struct ggml_context * ctx,
|
|
1329
|
+
struct ggml_tensor * a,
|
|
1330
|
+
struct ggml_tensor * b);
|
|
1331
|
+
|
|
1332
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1333
|
+
struct ggml_context * ctx,
|
|
1334
|
+
struct ggml_tensor * a,
|
|
1335
|
+
struct ggml_tensor * b);
|
|
1336
|
+
|
|
1337
|
+
GGML_API struct ggml_tensor * ggml_swiglu_oai(
|
|
1338
|
+
struct ggml_context * ctx,
|
|
1339
|
+
struct ggml_tensor * a,
|
|
1340
|
+
struct ggml_tensor * b,
|
|
1341
|
+
float alpha,
|
|
1342
|
+
float limit);
|
|
1343
|
+
|
|
1163
1344
|
// normalize along rows
|
|
1164
1345
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1165
1346
|
struct ggml_context * ctx,
|
|
@@ -1259,6 +1440,19 @@ extern "C" {
|
|
|
1259
1440
|
struct ggml_tensor * a,
|
|
1260
1441
|
float s);
|
|
1261
1442
|
|
|
1443
|
+
// x = s * a + b
|
|
1444
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
|
1445
|
+
struct ggml_context * ctx,
|
|
1446
|
+
struct ggml_tensor * a,
|
|
1447
|
+
float s,
|
|
1448
|
+
float b);
|
|
1449
|
+
|
|
1450
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
|
1451
|
+
struct ggml_context * ctx,
|
|
1452
|
+
struct ggml_tensor * a,
|
|
1453
|
+
float s,
|
|
1454
|
+
float b);
|
|
1455
|
+
|
|
1262
1456
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1263
1457
|
GGML_API struct ggml_tensor * ggml_set(
|
|
1264
1458
|
struct ggml_context * ctx,
|
|
@@ -1313,6 +1507,7 @@ extern "C" {
|
|
|
1313
1507
|
struct ggml_tensor * a,
|
|
1314
1508
|
struct ggml_tensor * b);
|
|
1315
1509
|
|
|
1510
|
+
// note: casting from f32 to i32 will discard the fractional part
|
|
1316
1511
|
GGML_API struct ggml_tensor * ggml_cast(
|
|
1317
1512
|
struct ggml_context * ctx,
|
|
1318
1513
|
struct ggml_tensor * a,
|
|
@@ -1437,7 +1632,11 @@ extern "C" {
|
|
|
1437
1632
|
struct ggml_context * ctx,
|
|
1438
1633
|
struct ggml_tensor * a);
|
|
1439
1634
|
|
|
1440
|
-
// supports
|
|
1635
|
+
// supports 4D a:
|
|
1636
|
+
// a [n_embd, ne1, ne2, ne3]
|
|
1637
|
+
// b I32 [n_rows, ne2, ne3, 1]
|
|
1638
|
+
//
|
|
1639
|
+
// return [n_embd, n_rows, ne2, ne3]
|
|
1441
1640
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
|
1442
1641
|
struct ggml_context * ctx,
|
|
1443
1642
|
struct ggml_tensor * a, // data
|
|
@@ -1503,8 +1702,14 @@ extern "C" {
|
|
|
1503
1702
|
struct ggml_context * ctx,
|
|
1504
1703
|
struct ggml_tensor * a);
|
|
1505
1704
|
|
|
1705
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1706
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1707
|
+
//
|
|
1708
|
+
// broadcast:
|
|
1709
|
+
// ne02 % ne12 == 0
|
|
1710
|
+
// ne03 % ne13 == 0
|
|
1711
|
+
//
|
|
1506
1712
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1507
|
-
// mask is optional
|
|
1508
1713
|
// max_bias = 0.0f for no ALiBi
|
|
1509
1714
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1510
1715
|
struct ggml_context * ctx,
|
|
@@ -1513,6 +1718,17 @@ extern "C" {
|
|
|
1513
1718
|
float scale,
|
|
1514
1719
|
float max_bias);
|
|
1515
1720
|
|
|
1721
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_inplace(
|
|
1722
|
+
struct ggml_context * ctx,
|
|
1723
|
+
struct ggml_tensor * a,
|
|
1724
|
+
struct ggml_tensor * mask,
|
|
1725
|
+
float scale,
|
|
1726
|
+
float max_bias);
|
|
1727
|
+
|
|
1728
|
+
GGML_API void ggml_soft_max_add_sinks(
|
|
1729
|
+
struct ggml_tensor * a,
|
|
1730
|
+
struct ggml_tensor * sinks);
|
|
1731
|
+
|
|
1516
1732
|
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
|
1517
1733
|
struct ggml_context * ctx,
|
|
1518
1734
|
struct ggml_tensor * a,
|
|
@@ -1571,7 +1787,7 @@ extern "C" {
|
|
|
1571
1787
|
struct ggml_tensor * b,
|
|
1572
1788
|
struct ggml_tensor * c,
|
|
1573
1789
|
int n_dims,
|
|
1574
|
-
int sections[
|
|
1790
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
1575
1791
|
int mode,
|
|
1576
1792
|
int n_ctx_orig,
|
|
1577
1793
|
float freq_base,
|
|
@@ -1597,6 +1813,22 @@ extern "C" {
|
|
|
1597
1813
|
float beta_fast,
|
|
1598
1814
|
float beta_slow);
|
|
1599
1815
|
|
|
1816
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
|
|
1817
|
+
struct ggml_context * ctx,
|
|
1818
|
+
struct ggml_tensor * a,
|
|
1819
|
+
struct ggml_tensor * b,
|
|
1820
|
+
struct ggml_tensor * c,
|
|
1821
|
+
int n_dims,
|
|
1822
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
1823
|
+
int mode,
|
|
1824
|
+
int n_ctx_orig,
|
|
1825
|
+
float freq_base,
|
|
1826
|
+
float freq_scale,
|
|
1827
|
+
float ext_factor,
|
|
1828
|
+
float attn_factor,
|
|
1829
|
+
float beta_fast,
|
|
1830
|
+
float beta_slow);
|
|
1831
|
+
|
|
1600
1832
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
|
1601
1833
|
struct ggml_context * ctx,
|
|
1602
1834
|
struct ggml_tensor * a,
|
|
@@ -1754,6 +1986,41 @@ extern "C" {
|
|
|
1754
1986
|
int d0, // dilation dimension 0
|
|
1755
1987
|
int d1); // dilation dimension 1
|
|
1756
1988
|
|
|
1989
|
+
GGML_API struct ggml_tensor * ggml_im2col_3d(
|
|
1990
|
+
struct ggml_context * ctx,
|
|
1991
|
+
struct ggml_tensor * a,
|
|
1992
|
+
struct ggml_tensor * b,
|
|
1993
|
+
int64_t IC,
|
|
1994
|
+
int s0, // stride width
|
|
1995
|
+
int s1, // stride height
|
|
1996
|
+
int s2, // stride depth
|
|
1997
|
+
int p0, // padding width
|
|
1998
|
+
int p1, // padding height
|
|
1999
|
+
int p2, // padding depth
|
|
2000
|
+
int d0, // dilation width
|
|
2001
|
+
int d1, // dilation height
|
|
2002
|
+
int d2, // dilation depth
|
|
2003
|
+
enum ggml_type dst_type);
|
|
2004
|
+
|
|
2005
|
+
// a: [OC*IC, KD, KH, KW]
|
|
2006
|
+
// b: [N*IC, ID, IH, IW]
|
|
2007
|
+
// result: [N*OC, OD, OH, OW]
|
|
2008
|
+
GGML_API struct ggml_tensor * ggml_conv_3d(
|
|
2009
|
+
struct ggml_context * ctx,
|
|
2010
|
+
struct ggml_tensor * a,
|
|
2011
|
+
struct ggml_tensor * b,
|
|
2012
|
+
int64_t IC,
|
|
2013
|
+
int s0, // stride width
|
|
2014
|
+
int s1, // stride height
|
|
2015
|
+
int s2, // stride depth
|
|
2016
|
+
int p0, // padding width
|
|
2017
|
+
int p1, // padding height
|
|
2018
|
+
int p2, // padding depth
|
|
2019
|
+
int d0, // dilation width
|
|
2020
|
+
int d1, // dilation height
|
|
2021
|
+
int d2 // dilation depth
|
|
2022
|
+
);
|
|
2023
|
+
|
|
1757
2024
|
// kernel size is a->ne[0] x a->ne[1]
|
|
1758
2025
|
// stride is equal to kernel size
|
|
1759
2026
|
// padding is zero
|
|
@@ -1825,6 +2092,23 @@ extern "C" {
|
|
|
1825
2092
|
int d0, // dilation dimension 0
|
|
1826
2093
|
int d1); // dilation dimension 1
|
|
1827
2094
|
|
|
2095
|
+
GGML_API struct ggml_tensor * ggml_conv_3d_direct(
|
|
2096
|
+
struct ggml_context * ctx,
|
|
2097
|
+
struct ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
|
|
2098
|
+
struct ggml_tensor * b, // input [W, H, D, C * N]
|
|
2099
|
+
int s0, // stride
|
|
2100
|
+
int s1,
|
|
2101
|
+
int s2,
|
|
2102
|
+
int p0, // padding
|
|
2103
|
+
int p1,
|
|
2104
|
+
int p2,
|
|
2105
|
+
int d0, // dilation
|
|
2106
|
+
int d1,
|
|
2107
|
+
int d2,
|
|
2108
|
+
int n_channels,
|
|
2109
|
+
int n_batch,
|
|
2110
|
+
int n_channels_out);
|
|
2111
|
+
|
|
1828
2112
|
enum ggml_op_pool {
|
|
1829
2113
|
GGML_OP_POOL_MAX,
|
|
1830
2114
|
GGML_OP_POOL_AVG,
|
|
@@ -1867,12 +2151,14 @@ extern "C" {
|
|
|
1867
2151
|
enum ggml_scale_mode {
|
|
1868
2152
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1869
2153
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
2154
|
+
GGML_SCALE_MODE_BICUBIC = 2,
|
|
1870
2155
|
|
|
1871
2156
|
GGML_SCALE_MODE_COUNT
|
|
1872
2157
|
};
|
|
1873
2158
|
|
|
1874
2159
|
enum ggml_scale_flag {
|
|
1875
|
-
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
2160
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8),
|
|
2161
|
+
GGML_SCALE_FLAG_ANTIALIAS = (1 << 9),
|
|
1876
2162
|
};
|
|
1877
2163
|
|
|
1878
2164
|
// interpolate
|
|
@@ -1915,6 +2201,41 @@ extern "C" {
|
|
|
1915
2201
|
int p2,
|
|
1916
2202
|
int p3);
|
|
1917
2203
|
|
|
2204
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2205
|
+
GGML_API struct ggml_tensor * ggml_pad_circular(
|
|
2206
|
+
struct ggml_context * ctx,
|
|
2207
|
+
struct ggml_tensor * a,
|
|
2208
|
+
int p0,
|
|
2209
|
+
int p1,
|
|
2210
|
+
int p2,
|
|
2211
|
+
int p3);
|
|
2212
|
+
|
|
2213
|
+
GGML_API struct ggml_tensor * ggml_pad_ext(
|
|
2214
|
+
struct ggml_context * ctx,
|
|
2215
|
+
struct ggml_tensor * a,
|
|
2216
|
+
int lp0,
|
|
2217
|
+
int rp0,
|
|
2218
|
+
int lp1,
|
|
2219
|
+
int rp1,
|
|
2220
|
+
int lp2,
|
|
2221
|
+
int rp2,
|
|
2222
|
+
int lp3,
|
|
2223
|
+
int rp3
|
|
2224
|
+
);
|
|
2225
|
+
|
|
2226
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2227
|
+
GGML_API struct ggml_tensor * ggml_pad_ext_circular(
|
|
2228
|
+
struct ggml_context * ctx,
|
|
2229
|
+
struct ggml_tensor * a,
|
|
2230
|
+
int lp0,
|
|
2231
|
+
int rp0,
|
|
2232
|
+
int lp1,
|
|
2233
|
+
int rp1,
|
|
2234
|
+
int lp2,
|
|
2235
|
+
int rp2,
|
|
2236
|
+
int lp3,
|
|
2237
|
+
int rp3);
|
|
2238
|
+
|
|
1918
2239
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
1919
2240
|
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
|
1920
2241
|
struct ggml_context * ctx,
|
|
@@ -1932,6 +2253,23 @@ extern "C" {
|
|
|
1932
2253
|
int shift2,
|
|
1933
2254
|
int shift3);
|
|
1934
2255
|
|
|
2256
|
+
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
|
|
2257
|
+
// zeroes everywhere outside the masked area
|
|
2258
|
+
GGML_API struct ggml_tensor * ggml_tri(
|
|
2259
|
+
struct ggml_context * ctx,
|
|
2260
|
+
struct ggml_tensor * a,
|
|
2261
|
+
enum ggml_tri_type type);
|
|
2262
|
+
|
|
2263
|
+
// Fill tensor a with constant c
|
|
2264
|
+
GGML_API struct ggml_tensor * ggml_fill(
|
|
2265
|
+
struct ggml_context * ctx,
|
|
2266
|
+
struct ggml_tensor * a,
|
|
2267
|
+
float c);
|
|
2268
|
+
|
|
2269
|
+
GGML_API struct ggml_tensor * ggml_fill_inplace(
|
|
2270
|
+
struct ggml_context * ctx,
|
|
2271
|
+
struct ggml_tensor * a,
|
|
2272
|
+
float c);
|
|
1935
2273
|
|
|
1936
2274
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1937
2275
|
// timesteps: [N,]
|
|
@@ -1953,25 +2291,36 @@ extern "C" {
|
|
|
1953
2291
|
struct ggml_tensor * a,
|
|
1954
2292
|
enum ggml_sort_order order);
|
|
1955
2293
|
|
|
1956
|
-
|
|
2294
|
+
// similar to ggml_top_k but implemented as `argsort` + `view`
|
|
2295
|
+
GGML_API struct ggml_tensor * ggml_argsort_top_k(
|
|
1957
2296
|
struct ggml_context * ctx,
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
float step);
|
|
2297
|
+
struct ggml_tensor * a,
|
|
2298
|
+
int k);
|
|
1961
2299
|
|
|
1962
2300
|
// top k elements per row
|
|
2301
|
+
// note: the resulting top k indices are in no particular order
|
|
1963
2302
|
GGML_API struct ggml_tensor * ggml_top_k(
|
|
1964
2303
|
struct ggml_context * ctx,
|
|
1965
2304
|
struct ggml_tensor * a,
|
|
1966
2305
|
int k);
|
|
1967
2306
|
|
|
1968
|
-
|
|
2307
|
+
GGML_API struct ggml_tensor * ggml_arange(
|
|
2308
|
+
struct ggml_context * ctx,
|
|
2309
|
+
float start,
|
|
2310
|
+
float stop,
|
|
2311
|
+
float step);
|
|
1969
2312
|
|
|
1970
|
-
// q: [n_embd_k, n_batch,
|
|
1971
|
-
// k: [n_embd_k, n_kv,
|
|
1972
|
-
// v: [n_embd_v, n_kv,
|
|
1973
|
-
// mask: [n_kv,
|
|
1974
|
-
// res: [n_embd_v, n_head,
|
|
2313
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2314
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2315
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2316
|
+
// mask: [n_kv, n_batch, ne32, ne33]
|
|
2317
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2318
|
+
//
|
|
2319
|
+
// broadcast:
|
|
2320
|
+
// n_head % n_head_kv == 0
|
|
2321
|
+
// n_head % ne32 == 0
|
|
2322
|
+
// ne3 % ne33 == 0
|
|
2323
|
+
//
|
|
1975
2324
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1976
2325
|
struct ggml_context * ctx,
|
|
1977
2326
|
struct ggml_tensor * q,
|
|
@@ -1989,6 +2338,10 @@ extern "C" {
|
|
|
1989
2338
|
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
|
1990
2339
|
const struct ggml_tensor * a);
|
|
1991
2340
|
|
|
2341
|
+
GGML_API void ggml_flash_attn_ext_add_sinks(
|
|
2342
|
+
struct ggml_tensor * a,
|
|
2343
|
+
struct ggml_tensor * sinks);
|
|
2344
|
+
|
|
1992
2345
|
// TODO: needs to be adapted to ggml_flash_attn_ext
|
|
1993
2346
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
|
1994
2347
|
struct ggml_context * ctx,
|
|
@@ -2010,7 +2363,8 @@ extern "C" {
|
|
|
2010
2363
|
struct ggml_tensor * dt,
|
|
2011
2364
|
struct ggml_tensor * A,
|
|
2012
2365
|
struct ggml_tensor * B,
|
|
2013
|
-
struct ggml_tensor * C
|
|
2366
|
+
struct ggml_tensor * C,
|
|
2367
|
+
struct ggml_tensor * ids);
|
|
2014
2368
|
|
|
2015
2369
|
// partition into non-overlapping windows with padding if needed
|
|
2016
2370
|
// example:
|
|
@@ -2090,6 +2444,27 @@ extern "C" {
|
|
|
2090
2444
|
struct ggml_tensor * b,
|
|
2091
2445
|
struct ggml_tensor * state);
|
|
2092
2446
|
|
|
2447
|
+
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
|
|
2448
|
+
* without zeroes on the diagonal (i.e. invertible).
|
|
2449
|
+
* B can have any number of columns, but must have the same number of rows as A
|
|
2450
|
+
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
|
|
2451
|
+
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
|
|
2452
|
+
* where n > 100 sparingly, pre-chunk if necessary.
|
|
2453
|
+
*
|
|
2454
|
+
* If left = false, solves xA=B instead
|
|
2455
|
+
* If lower = false, assumes upper triangular instead
|
|
2456
|
+
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
|
|
2457
|
+
*
|
|
2458
|
+
* TODO: currently only lower, right, non-unitriangular variant is implemented
|
|
2459
|
+
*/
|
|
2460
|
+
GGML_API struct ggml_tensor * ggml_solve_tri(
|
|
2461
|
+
struct ggml_context * ctx,
|
|
2462
|
+
struct ggml_tensor * a,
|
|
2463
|
+
struct ggml_tensor * b,
|
|
2464
|
+
bool left,
|
|
2465
|
+
bool lower,
|
|
2466
|
+
bool uni);
|
|
2467
|
+
|
|
2093
2468
|
// custom operators
|
|
2094
2469
|
|
|
2095
2470
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
|
@@ -2193,7 +2568,14 @@ extern "C" {
|
|
|
2193
2568
|
struct ggml_tensor * grad,
|
|
2194
2569
|
struct ggml_tensor * m,
|
|
2195
2570
|
struct ggml_tensor * v,
|
|
2196
|
-
struct ggml_tensor * adamw_params); // parameters such
|
|
2571
|
+
struct ggml_tensor * adamw_params); // parameters such as the learning rate
|
|
2572
|
+
|
|
2573
|
+
// stochastic gradient descent step (with weight decay)
|
|
2574
|
+
GGML_API struct ggml_tensor * ggml_opt_step_sgd(
|
|
2575
|
+
struct ggml_context * ctx,
|
|
2576
|
+
struct ggml_tensor * a,
|
|
2577
|
+
struct ggml_tensor * grad,
|
|
2578
|
+
struct ggml_tensor * sgd_params); // alpha, weight decay
|
|
2197
2579
|
|
|
2198
2580
|
//
|
|
2199
2581
|
// automatic differentiation
|
|
@@ -2238,7 +2620,8 @@ extern "C" {
|
|
|
2238
2620
|
|
|
2239
2621
|
// Set callback for all future logging events.
|
|
2240
2622
|
// If this is not called, or NULL is supplied, everything is output on stderr.
|
|
2241
|
-
GGML_API void
|
|
2623
|
+
GGML_API void ggml_log_get(ggml_log_callback * log_callback, void ** user_data);
|
|
2624
|
+
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
|
2242
2625
|
|
|
2243
2626
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
2244
2627
|
|