whispercpp 1.3.4 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +158 -44
- data/ext/extconf.rb +3 -2
- data/ext/ruby_whisper.c +34 -6
- data/ext/ruby_whisper.h +67 -0
- data/ext/ruby_whisper_context.c +236 -144
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +12 -13
- data/ext/ruby_whisper_params.c +47 -24
- data/ext/ruby_whisper_segment.c +84 -20
- data/ext/ruby_whisper_token.c +371 -0
- data/ext/ruby_whisper_transcribe.cpp +5 -2
- data/ext/ruby_whisper_vad_context.c +122 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +51 -0
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +138 -0
- data/ext/ruby_whisper_vad_segments.c +105 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +129 -112
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +28 -15
- data/ext/sources/examples/talk-llama/CMakeLists.txt +8 -3
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +5 -2
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2378 -1988
- data/ext/sources/examples/talk-llama/llama-arch.h +109 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +78 -34
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +100 -4
- data/ext/sources/examples/talk-llama/llama-chat.h +5 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +1088 -403
- data/ext/sources/examples/talk-llama/llama-context.h +70 -23
- data/ext/sources/examples/talk-llama/llama-cparams.h +6 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +295 -60
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +925 -155
- data/ext/sources/examples/talk-llama/llama-graph.h +234 -23
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +79 -38
- data/ext/sources/examples/talk-llama/llama-hparams.h +118 -18
- data/ext/sources/examples/talk-llama/llama-impl.cpp +11 -7
- data/ext/sources/examples/talk-llama/llama-impl.h +14 -2
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +8 -4
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +405 -140
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +24 -10
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +42 -31
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +197 -45
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +606 -116
- data/ext/sources/examples/talk-llama/llama-model-loader.h +41 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +61 -44
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +2756 -13643
- data/ext/sources/examples/talk-llama/llama-model.h +112 -18
- data/ext/sources/examples/talk-llama/llama-quant.cpp +582 -365
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +1409 -199
- data/ext/sources/examples/talk-llama/llama-sampler.h +42 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +248 -82
- data/ext/sources/examples/talk-llama/llama-vocab.h +50 -40
- data/ext/sources/examples/talk-llama/llama.cpp +802 -21
- data/ext/sources/examples/talk-llama/llama.h +210 -39
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +190 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +137 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +143 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +133 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +184 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +142 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +262 -0
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +148 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +157 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +195 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +210 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +139 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +153 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba-base.cpp +289 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +54 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +129 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +200 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +704 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +109 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +320 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +169 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +131 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +525 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +140 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +164 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +137 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +121 -79
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +90 -56
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +5 -2
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +6 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +14 -12
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +246 -21
- data/ext/sources/ggml/src/CMakeLists.txt +85 -11
- data/ext/sources/ggml/src/ggml-alloc.c +128 -50
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +54 -88
- data/ext/sources/ggml/src/ggml-backend.cpp +76 -23
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +18 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +11 -11
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +58 -46
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +139 -48
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2427 -1785
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -362
- data/ext/sources/ggml/src/ggml-cann/common.h +285 -211
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +663 -831
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +170 -95
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -18
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +513 -27
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +4192 -992
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1761 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +124 -24
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +157 -28
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +8 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +251 -80
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +19 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +587 -119
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1093 -194
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1284 -203
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1519 -527
- data/ext/sources/ggml/src/ggml-cpu/ops.h +6 -4
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3632 -781
- data/ext/sources/ggml/src/ggml-cpu/repack.h +129 -4
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +152 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +152 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +140 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +132 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +33 -31
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +474 -85
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +342 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +98 -74
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +973 -665
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1255 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +33 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +40 -18
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +206 -45
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1688 -302
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +908 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +88 -20
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +502 -90
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +532 -193
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +460 -104
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +5 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +360 -122
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +2 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +73 -39
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +152 -1
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +364 -149
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +163 -41
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +68 -50
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +22 -4
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +95 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +275 -119
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -7
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +160 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +38 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3325 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +46 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +813 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +891 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +713 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +155 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +26 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1199 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2670 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +497 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +419 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +382 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -13
- data/ext/sources/ggml/src/ggml-impl.h +129 -6
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +15 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +173 -34
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +912 -344
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +124 -59
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +588 -144
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +396 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1724 -421
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +16 -3
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +333 -114
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3050 -1539
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +30 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4279 -497
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +267 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +59 -87
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +81 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -29
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +427 -20
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +103 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +867 -50
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +401 -358
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +645 -155
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +221 -66
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +457 -281
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +71 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +39 -19
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5994 -3055
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +18 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +386 -160
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +82 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +400 -174
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +123 -37
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +10 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +13 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +77 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +88 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +41 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +92 -230
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +49 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +8 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +8 -32
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +8 -32
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +8 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +384 -180
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1374 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2544 -726
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +73 -15
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +72 -261
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +766 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +147 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +196 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- data/ext/sources/ggml/src/ggml.c +590 -64
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +106 -62
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +162 -4
- data/test/test_context_params.rb +82 -0
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +81 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +100 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +27 -0
- data/whispercpp.gemspec +1 -1
- metadata +502 -37
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/examples/talk-llama/llama-sampling.h +0 -32
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
data/ext/sources/ggml/src/ggml.c
CHANGED
|
@@ -53,13 +53,15 @@
|
|
|
53
53
|
|
|
54
54
|
#define UNUSED GGML_UNUSED
|
|
55
55
|
|
|
56
|
+
// Needed for ggml_fp32_to_bf16_row()
|
|
57
|
+
#if defined(__AVX512BF16__)
|
|
56
58
|
#if defined(_MSC_VER)
|
|
57
|
-
#define m512bh(p) p
|
|
58
59
|
#define m512i(p) p
|
|
59
60
|
#else
|
|
60
|
-
#
|
|
61
|
+
#include <immintrin.h>
|
|
61
62
|
#define m512i(p) (__m512i)(p)
|
|
62
|
-
#endif
|
|
63
|
+
#endif // defined(_MSC_VER)
|
|
64
|
+
#endif // defined(__AVX512BF16__)
|
|
63
65
|
|
|
64
66
|
#if defined(__linux__) || \
|
|
65
67
|
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
|
@@ -124,6 +126,13 @@ static void ggml_print_backtrace_symbols(void) {
|
|
|
124
126
|
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
125
127
|
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
126
128
|
}
|
|
129
|
+
#elif defined(__APPLE__)
|
|
130
|
+
#include <execinfo.h>
|
|
131
|
+
static void ggml_print_backtrace_symbols(void) {
|
|
132
|
+
void * trace[100];
|
|
133
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
134
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
135
|
+
}
|
|
127
136
|
#else
|
|
128
137
|
static void ggml_print_backtrace_symbols(void) {
|
|
129
138
|
// platform not supported
|
|
@@ -135,6 +144,20 @@ void ggml_print_backtrace(void) {
|
|
|
135
144
|
if (GGML_NO_BACKTRACE) {
|
|
136
145
|
return;
|
|
137
146
|
}
|
|
147
|
+
#if defined(__APPLE__)
|
|
148
|
+
// On macOS, fork+debugger attachment is problematic due to:
|
|
149
|
+
// 1. libdispatch "poisons" forked child processes
|
|
150
|
+
// 2. lldb has issues attaching to parent from forked child
|
|
151
|
+
// Use simple backtrace() instead to avoid Terminal.app crashes
|
|
152
|
+
const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
|
|
153
|
+
if (!GGML_BACKTRACE_LLDB) {
|
|
154
|
+
fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
|
|
155
|
+
fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
|
|
156
|
+
fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
|
|
157
|
+
ggml_print_backtrace_symbols();
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
#endif
|
|
138
161
|
#if defined(__linux__)
|
|
139
162
|
FILE * f = fopen("/proc/self/status", "r");
|
|
140
163
|
size_t size = 0;
|
|
@@ -695,6 +718,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
|
695
718
|
.to_float = (ggml_to_float_t) dequantize_row_mxfp4,
|
|
696
719
|
.from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref,
|
|
697
720
|
},
|
|
721
|
+
[GGML_TYPE_NVFP4] = {
|
|
722
|
+
.type_name = "nvfp4",
|
|
723
|
+
.blck_size = QK_NVFP4,
|
|
724
|
+
.type_size = sizeof(block_nvfp4),
|
|
725
|
+
.is_quantized = true,
|
|
726
|
+
.to_float = (ggml_to_float_t) dequantize_row_nvfp4,
|
|
727
|
+
.from_float_ref = (ggml_from_float_t)quantize_row_nvfp4_ref,
|
|
728
|
+
},
|
|
698
729
|
[GGML_TYPE_Q2_K] = {
|
|
699
730
|
.type_name = "q2_K",
|
|
700
731
|
.blck_size = QK_K,
|
|
@@ -876,7 +907,8 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
|
876
907
|
};
|
|
877
908
|
|
|
878
909
|
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
|
879
|
-
|
|
910
|
+
assert(type >= 0);
|
|
911
|
+
assert(type < GGML_TYPE_COUNT);
|
|
880
912
|
return &type_traits[type];
|
|
881
913
|
}
|
|
882
914
|
|
|
@@ -935,6 +967,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
935
967
|
"COS",
|
|
936
968
|
"SUM",
|
|
937
969
|
"SUM_ROWS",
|
|
970
|
+
"CUMSUM",
|
|
938
971
|
"MEAN",
|
|
939
972
|
"ARGMAX",
|
|
940
973
|
"COUNT_EQUAL",
|
|
@@ -989,7 +1022,10 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
989
1022
|
"ARANGE",
|
|
990
1023
|
"TIMESTEP_EMBEDDING",
|
|
991
1024
|
"ARGSORT",
|
|
1025
|
+
"TOP_K",
|
|
992
1026
|
"LEAKY_RELU",
|
|
1027
|
+
"TRI",
|
|
1028
|
+
"FILL",
|
|
993
1029
|
|
|
994
1030
|
"FLASH_ATTN_EXT",
|
|
995
1031
|
"FLASH_ATTN_BACK",
|
|
@@ -1002,6 +1038,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
1002
1038
|
"RWKV_WKV6",
|
|
1003
1039
|
"GATED_LINEAR_ATTN",
|
|
1004
1040
|
"RWKV_WKV7",
|
|
1041
|
+
"SOLVE_TRI",
|
|
1042
|
+
"GATED_DELTA_NET",
|
|
1005
1043
|
|
|
1006
1044
|
"UNARY",
|
|
1007
1045
|
|
|
@@ -1019,7 +1057,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
1019
1057
|
"GLU",
|
|
1020
1058
|
};
|
|
1021
1059
|
|
|
1022
|
-
static_assert(GGML_OP_COUNT ==
|
|
1060
|
+
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
|
|
1023
1061
|
|
|
1024
1062
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1025
1063
|
"none",
|
|
@@ -1039,6 +1077,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1039
1077
|
"cos(x)",
|
|
1040
1078
|
"Σx",
|
|
1041
1079
|
"Σx_k",
|
|
1080
|
+
"cumsum(x)",
|
|
1042
1081
|
"Σx/n",
|
|
1043
1082
|
"argmax(x)",
|
|
1044
1083
|
"count_equal(x)",
|
|
@@ -1093,7 +1132,10 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1093
1132
|
"arange(start, stop, step)",
|
|
1094
1133
|
"timestep_embedding(timesteps, dim, max_period)",
|
|
1095
1134
|
"argsort(x)",
|
|
1135
|
+
"top_k(x)",
|
|
1096
1136
|
"leaky_relu(x)",
|
|
1137
|
+
"tri(x)",
|
|
1138
|
+
"fill(x, c)",
|
|
1097
1139
|
|
|
1098
1140
|
"flash_attn_ext(x)",
|
|
1099
1141
|
"flash_attn_back(x)",
|
|
@@ -1106,6 +1148,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1106
1148
|
"rwkv_wkv6(k, v, r, tf, td, s)",
|
|
1107
1149
|
"gated_linear_attn(k, v, q, gate, s)",
|
|
1108
1150
|
"rwkv_wkv7(r, w, k, v, a, b, s)",
|
|
1151
|
+
"A X = B, A triangular, solve X",
|
|
1152
|
+
"gated_delta_net(q, k, v, g, beta, s)",
|
|
1109
1153
|
|
|
1110
1154
|
"unary(x)",
|
|
1111
1155
|
|
|
@@ -1123,7 +1167,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1123
1167
|
"glu(x)",
|
|
1124
1168
|
};
|
|
1125
1169
|
|
|
1126
|
-
static_assert(GGML_OP_COUNT ==
|
|
1170
|
+
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
|
|
1127
1171
|
|
|
1128
1172
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1129
1173
|
|
|
@@ -1142,11 +1186,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
|
1142
1186
|
"HARDSWISH",
|
|
1143
1187
|
"HARDSIGMOID",
|
|
1144
1188
|
"EXP",
|
|
1189
|
+
"EXPM1",
|
|
1190
|
+
"SOFTPLUS",
|
|
1145
1191
|
"GELU_ERF",
|
|
1192
|
+
"XIELU",
|
|
1193
|
+
"FLOOR",
|
|
1194
|
+
"CEIL",
|
|
1195
|
+
"ROUND",
|
|
1196
|
+
"TRUNC",
|
|
1146
1197
|
};
|
|
1147
1198
|
|
|
1148
|
-
static_assert(GGML_UNARY_OP_COUNT ==
|
|
1149
|
-
|
|
1199
|
+
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
|
|
1150
1200
|
|
|
1151
1201
|
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
|
1152
1202
|
"REGLU",
|
|
@@ -1226,27 +1276,39 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
|
|
|
1226
1276
|
}
|
|
1227
1277
|
|
|
1228
1278
|
int64_t ggml_blck_size(enum ggml_type type) {
|
|
1279
|
+
assert(type >= 0);
|
|
1280
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1229
1281
|
return type_traits[type].blck_size;
|
|
1230
1282
|
}
|
|
1231
1283
|
|
|
1232
1284
|
size_t ggml_type_size(enum ggml_type type) {
|
|
1285
|
+
assert(type >= 0);
|
|
1286
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1233
1287
|
return type_traits[type].type_size;
|
|
1234
1288
|
}
|
|
1235
1289
|
|
|
1236
1290
|
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
|
|
1291
|
+
assert(type >= 0);
|
|
1292
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1237
1293
|
assert(ne % ggml_blck_size(type) == 0);
|
|
1238
1294
|
return ggml_type_size(type)*ne/ggml_blck_size(type);
|
|
1239
1295
|
}
|
|
1240
1296
|
|
|
1241
1297
|
double ggml_type_sizef(enum ggml_type type) {
|
|
1298
|
+
assert(type >= 0);
|
|
1299
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1242
1300
|
return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
|
|
1243
1301
|
}
|
|
1244
1302
|
|
|
1245
1303
|
const char * ggml_type_name(enum ggml_type type) {
|
|
1246
|
-
|
|
1304
|
+
assert(type >= 0);
|
|
1305
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1306
|
+
return type_traits[type].type_name;
|
|
1247
1307
|
}
|
|
1248
1308
|
|
|
1249
1309
|
bool ggml_is_quantized(enum ggml_type type) {
|
|
1310
|
+
assert(type >= 0);
|
|
1311
|
+
assert(type < GGML_TYPE_COUNT);
|
|
1250
1312
|
return type_traits[type].is_quantized;
|
|
1251
1313
|
}
|
|
1252
1314
|
|
|
@@ -1326,6 +1388,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
|
1326
1388
|
case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
|
|
1327
1389
|
case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
|
|
1328
1390
|
case GGML_FTYPE_MOSTLY_MXFP4: wtype = GGML_TYPE_MXFP4; break;
|
|
1391
|
+
case GGML_FTYPE_MOSTLY_NVFP4: wtype = GGML_TYPE_NVFP4; break;
|
|
1329
1392
|
case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
|
|
1330
1393
|
case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
|
|
1331
1394
|
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
|
|
@@ -1364,16 +1427,14 @@ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
|
|
|
1364
1427
|
}
|
|
1365
1428
|
next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
|
|
1366
1429
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
1367
|
-
if (
|
|
1368
|
-
if (i
|
|
1369
|
-
|
|
1370
|
-
return false;
|
|
1371
|
-
}
|
|
1372
|
-
next_nb *= tensor->ne[i];
|
|
1373
|
-
} else {
|
|
1374
|
-
// this dimension does not need to be contiguous
|
|
1375
|
-
next_nb = tensor->ne[i]*tensor->nb[i];
|
|
1430
|
+
if (i > n) {
|
|
1431
|
+
if (tensor->ne[i] != 1 && tensor->nb[i] != next_nb) {
|
|
1432
|
+
return false;
|
|
1376
1433
|
}
|
|
1434
|
+
next_nb *= tensor->ne[i];
|
|
1435
|
+
} else {
|
|
1436
|
+
// this dimension does not need to be contiguous
|
|
1437
|
+
next_nb = tensor->ne[i]*tensor->nb[i];
|
|
1377
1438
|
}
|
|
1378
1439
|
}
|
|
1379
1440
|
return true;
|
|
@@ -1457,6 +1518,10 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
|
|
|
1457
1518
|
(t0->nb[3] == t1->nb[3]);
|
|
1458
1519
|
}
|
|
1459
1520
|
|
|
1521
|
+
bool ggml_is_view(const struct ggml_tensor * t) {
|
|
1522
|
+
return ggml_impl_is_view(t);
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1460
1525
|
// check if t1 can be represented as a repetition of t0
|
|
1461
1526
|
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
|
1462
1527
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
|
@@ -1586,11 +1651,23 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
|
|
|
1586
1651
|
const size_t cur_end = cur_offs + cur_size;
|
|
1587
1652
|
|
|
1588
1653
|
// align to GGML_MEM_ALIGN
|
|
1654
|
+
GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
|
|
1589
1655
|
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
|
1590
1656
|
|
|
1591
1657
|
char * const mem_buffer = ctx->mem_buffer;
|
|
1592
1658
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
|
1593
1659
|
|
|
1660
|
+
// integer overflow checks
|
|
1661
|
+
if (cur_end > SIZE_MAX - size_needed) {
|
|
1662
|
+
GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
|
|
1663
|
+
return NULL;
|
|
1664
|
+
}
|
|
1665
|
+
if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
|
|
1666
|
+
GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
|
|
1667
|
+
cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
|
|
1668
|
+
return NULL;
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1594
1671
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
|
1595
1672
|
GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
|
1596
1673
|
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
|
@@ -1659,6 +1736,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
|
1659
1736
|
obj_alloc_size = data_size;
|
|
1660
1737
|
}
|
|
1661
1738
|
|
|
1739
|
+
GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
|
|
1740
|
+
|
|
1662
1741
|
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
|
1663
1742
|
GGML_ASSERT(obj_new);
|
|
1664
1743
|
|
|
@@ -2254,6 +2333,30 @@ struct ggml_tensor * ggml_log_inplace(
|
|
|
2254
2333
|
return ggml_log_impl(ctx, a, true);
|
|
2255
2334
|
}
|
|
2256
2335
|
|
|
2336
|
+
struct ggml_tensor * ggml_expm1(
|
|
2337
|
+
struct ggml_context * ctx,
|
|
2338
|
+
struct ggml_tensor * a) {
|
|
2339
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
struct ggml_tensor * ggml_expm1_inplace(
|
|
2343
|
+
struct ggml_context * ctx,
|
|
2344
|
+
struct ggml_tensor * a) {
|
|
2345
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2346
|
+
}
|
|
2347
|
+
|
|
2348
|
+
struct ggml_tensor * ggml_softplus(
|
|
2349
|
+
struct ggml_context * ctx,
|
|
2350
|
+
struct ggml_tensor * a) {
|
|
2351
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2352
|
+
}
|
|
2353
|
+
|
|
2354
|
+
struct ggml_tensor * ggml_softplus_inplace(
|
|
2355
|
+
struct ggml_context * ctx,
|
|
2356
|
+
struct ggml_tensor * a) {
|
|
2357
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2358
|
+
}
|
|
2359
|
+
|
|
2257
2360
|
// ggml_sin
|
|
2258
2361
|
|
|
2259
2362
|
static struct ggml_tensor * ggml_sin_impl(
|
|
@@ -2337,6 +2440,21 @@ struct ggml_tensor * ggml_sum_rows(
|
|
|
2337
2440
|
return result;
|
|
2338
2441
|
}
|
|
2339
2442
|
|
|
2443
|
+
// ggml_cumsum
|
|
2444
|
+
|
|
2445
|
+
struct ggml_tensor * ggml_cumsum(
|
|
2446
|
+
struct ggml_context * ctx,
|
|
2447
|
+
struct ggml_tensor * a) {
|
|
2448
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
2449
|
+
|
|
2450
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2451
|
+
|
|
2452
|
+
result->op = GGML_OP_CUMSUM;
|
|
2453
|
+
result->src[0] = a;
|
|
2454
|
+
|
|
2455
|
+
return result;
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2340
2458
|
// ggml_mean
|
|
2341
2459
|
|
|
2342
2460
|
struct ggml_tensor * ggml_mean(
|
|
@@ -2652,6 +2770,29 @@ struct ggml_tensor * ggml_silu_inplace(
|
|
|
2652
2770
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
|
2653
2771
|
}
|
|
2654
2772
|
|
|
2773
|
+
// ggml_xielu
|
|
2774
|
+
|
|
2775
|
+
struct ggml_tensor * ggml_xielu(
|
|
2776
|
+
struct ggml_context * ctx,
|
|
2777
|
+
struct ggml_tensor * a,
|
|
2778
|
+
float alpha_n,
|
|
2779
|
+
float alpha_p,
|
|
2780
|
+
float beta,
|
|
2781
|
+
float eps) {
|
|
2782
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2783
|
+
|
|
2784
|
+
ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
|
|
2785
|
+
ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
|
|
2786
|
+
ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
|
|
2787
|
+
ggml_set_op_params_f32(result, 3, beta);
|
|
2788
|
+
ggml_set_op_params_f32(result, 4, eps);
|
|
2789
|
+
|
|
2790
|
+
result->op = GGML_OP_UNARY;
|
|
2791
|
+
result->src[0] = a;
|
|
2792
|
+
|
|
2793
|
+
return result;
|
|
2794
|
+
}
|
|
2795
|
+
|
|
2655
2796
|
// ggml_silu_back
|
|
2656
2797
|
|
|
2657
2798
|
struct ggml_tensor * ggml_silu_back(
|
|
@@ -2726,6 +2867,62 @@ static struct ggml_tensor * ggml_glu_impl(
|
|
|
2726
2867
|
return result;
|
|
2727
2868
|
}
|
|
2728
2869
|
|
|
2870
|
+
// ggml_floor
|
|
2871
|
+
|
|
2872
|
+
struct ggml_tensor * ggml_floor(
|
|
2873
|
+
struct ggml_context * ctx,
|
|
2874
|
+
struct ggml_tensor * a) {
|
|
2875
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2876
|
+
}
|
|
2877
|
+
|
|
2878
|
+
struct ggml_tensor * ggml_floor_inplace(
|
|
2879
|
+
struct ggml_context * ctx,
|
|
2880
|
+
struct ggml_tensor * a) {
|
|
2881
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2882
|
+
}
|
|
2883
|
+
|
|
2884
|
+
// ggml_ceil
|
|
2885
|
+
|
|
2886
|
+
struct ggml_tensor * ggml_ceil(
|
|
2887
|
+
struct ggml_context * ctx,
|
|
2888
|
+
struct ggml_tensor * a) {
|
|
2889
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2890
|
+
}
|
|
2891
|
+
|
|
2892
|
+
struct ggml_tensor * ggml_ceil_inplace(
|
|
2893
|
+
struct ggml_context * ctx,
|
|
2894
|
+
struct ggml_tensor * a) {
|
|
2895
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2896
|
+
}
|
|
2897
|
+
|
|
2898
|
+
//ggml_round
|
|
2899
|
+
|
|
2900
|
+
struct ggml_tensor * ggml_round(
|
|
2901
|
+
struct ggml_context * ctx,
|
|
2902
|
+
struct ggml_tensor * a) {
|
|
2903
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2904
|
+
}
|
|
2905
|
+
|
|
2906
|
+
struct ggml_tensor * ggml_round_inplace(
|
|
2907
|
+
struct ggml_context * ctx,
|
|
2908
|
+
struct ggml_tensor * a) {
|
|
2909
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2910
|
+
}
|
|
2911
|
+
|
|
2912
|
+
//ggml_trunc
|
|
2913
|
+
|
|
2914
|
+
struct ggml_tensor * ggml_trunc(
|
|
2915
|
+
struct ggml_context * ctx,
|
|
2916
|
+
struct ggml_tensor * a) {
|
|
2917
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2918
|
+
}
|
|
2919
|
+
|
|
2920
|
+
struct ggml_tensor * ggml_trunc_inplace(
|
|
2921
|
+
struct ggml_context * ctx,
|
|
2922
|
+
struct ggml_tensor * a) {
|
|
2923
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2729
2926
|
struct ggml_tensor * ggml_glu(
|
|
2730
2927
|
struct ggml_context * ctx,
|
|
2731
2928
|
struct ggml_tensor * a,
|
|
@@ -3284,7 +3481,8 @@ struct ggml_tensor * ggml_cast(
|
|
|
3284
3481
|
|
|
3285
3482
|
result->op = GGML_OP_CPY;
|
|
3286
3483
|
result->src[0] = a;
|
|
3287
|
-
result->src[1] = result;
|
|
3484
|
+
result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
|
|
3485
|
+
// backends for consistency with ggml_cpy_impl() above
|
|
3288
3486
|
|
|
3289
3487
|
return result;
|
|
3290
3488
|
}
|
|
@@ -3829,6 +4027,15 @@ struct ggml_tensor * ggml_soft_max_ext(
|
|
|
3829
4027
|
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
|
|
3830
4028
|
}
|
|
3831
4029
|
|
|
4030
|
+
struct ggml_tensor * ggml_soft_max_ext_inplace(
|
|
4031
|
+
struct ggml_context * ctx,
|
|
4032
|
+
struct ggml_tensor * a,
|
|
4033
|
+
struct ggml_tensor * mask,
|
|
4034
|
+
float scale,
|
|
4035
|
+
float max_bias) {
|
|
4036
|
+
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
|
|
4037
|
+
}
|
|
4038
|
+
|
|
3832
4039
|
void ggml_soft_max_add_sinks(
|
|
3833
4040
|
struct ggml_tensor * a,
|
|
3834
4041
|
struct ggml_tensor * sinks) {
|
|
@@ -4672,6 +4879,8 @@ struct ggml_tensor * ggml_pool_1d(
|
|
|
4672
4879
|
a->ne[2],
|
|
4673
4880
|
a->ne[3],
|
|
4674
4881
|
};
|
|
4882
|
+
GGML_ASSERT(ne[0] > 0);
|
|
4883
|
+
|
|
4675
4884
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
4676
4885
|
|
|
4677
4886
|
int32_t params[] = { op, k0, s0, p0 };
|
|
@@ -4702,6 +4911,9 @@ struct ggml_tensor * ggml_pool_2d(
|
|
|
4702
4911
|
a->ne[2],
|
|
4703
4912
|
a->ne[3],
|
|
4704
4913
|
};
|
|
4914
|
+
GGML_ASSERT(ne[0] > 0);
|
|
4915
|
+
GGML_ASSERT(ne[1] > 0);
|
|
4916
|
+
|
|
4705
4917
|
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
4706
4918
|
|
|
4707
4919
|
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
|
@@ -4748,6 +4960,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
|
|
|
4748
4960
|
int64_t ne3,
|
|
4749
4961
|
uint32_t mode) {
|
|
4750
4962
|
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
|
4963
|
+
// TODO: implement antialias for modes other than bilinear
|
|
4964
|
+
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
|
|
4751
4965
|
|
|
4752
4966
|
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
|
4753
4967
|
|
|
@@ -4802,6 +5016,18 @@ struct ggml_tensor * ggml_pad(
|
|
|
4802
5016
|
return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
4803
5017
|
}
|
|
4804
5018
|
|
|
5019
|
+
// ggml_pad_circular
|
|
5020
|
+
|
|
5021
|
+
struct ggml_tensor * ggml_pad_circular(
|
|
5022
|
+
struct ggml_context * ctx,
|
|
5023
|
+
struct ggml_tensor * a,
|
|
5024
|
+
int p0,
|
|
5025
|
+
int p1,
|
|
5026
|
+
int p2,
|
|
5027
|
+
int p3) {
|
|
5028
|
+
return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
5029
|
+
}
|
|
5030
|
+
|
|
4805
5031
|
struct ggml_tensor * ggml_pad_ext(
|
|
4806
5032
|
struct ggml_context * ctx,
|
|
4807
5033
|
struct ggml_tensor * a,
|
|
@@ -4828,6 +5054,7 @@ struct ggml_tensor * ggml_pad_ext(
|
|
|
4828
5054
|
ggml_set_op_params_i32(result, 5, rp2);
|
|
4829
5055
|
ggml_set_op_params_i32(result, 6, lp3);
|
|
4830
5056
|
ggml_set_op_params_i32(result, 7, rp3);
|
|
5057
|
+
ggml_set_op_params_i32(result, 8, 0); // not circular by default
|
|
4831
5058
|
|
|
4832
5059
|
|
|
4833
5060
|
result->op = GGML_OP_PAD;
|
|
@@ -4836,6 +5063,25 @@ struct ggml_tensor * ggml_pad_ext(
|
|
|
4836
5063
|
return result;
|
|
4837
5064
|
}
|
|
4838
5065
|
|
|
5066
|
+
// ggml_pad_ext_circular
|
|
5067
|
+
|
|
5068
|
+
struct ggml_tensor * ggml_pad_ext_circular(
|
|
5069
|
+
struct ggml_context * ctx,
|
|
5070
|
+
struct ggml_tensor * a,
|
|
5071
|
+
int lp0,
|
|
5072
|
+
int rp0,
|
|
5073
|
+
int lp1,
|
|
5074
|
+
int rp1,
|
|
5075
|
+
int lp2,
|
|
5076
|
+
int rp2,
|
|
5077
|
+
int lp3,
|
|
5078
|
+
int rp3
|
|
5079
|
+
) {
|
|
5080
|
+
struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
|
|
5081
|
+
ggml_set_op_params_i32(result, 8, 1); // circular
|
|
5082
|
+
return result;
|
|
5083
|
+
}
|
|
5084
|
+
|
|
4839
5085
|
// ggml_pad_reflect_1d
|
|
4840
5086
|
|
|
4841
5087
|
struct ggml_tensor * ggml_pad_reflect_1d(
|
|
@@ -4895,28 +5141,6 @@ struct ggml_tensor * ggml_roll(
|
|
|
4895
5141
|
return result;
|
|
4896
5142
|
}
|
|
4897
5143
|
|
|
4898
|
-
// ggml_arange
|
|
4899
|
-
|
|
4900
|
-
struct ggml_tensor * ggml_arange(
|
|
4901
|
-
struct ggml_context * ctx,
|
|
4902
|
-
float start,
|
|
4903
|
-
float stop,
|
|
4904
|
-
float step) {
|
|
4905
|
-
GGML_ASSERT(stop > start);
|
|
4906
|
-
|
|
4907
|
-
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
4908
|
-
|
|
4909
|
-
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
4910
|
-
|
|
4911
|
-
ggml_set_op_params_f32(result, 0, start);
|
|
4912
|
-
ggml_set_op_params_f32(result, 1, stop);
|
|
4913
|
-
ggml_set_op_params_f32(result, 2, step);
|
|
4914
|
-
|
|
4915
|
-
result->op = GGML_OP_ARANGE;
|
|
4916
|
-
|
|
4917
|
-
return result;
|
|
4918
|
-
}
|
|
4919
|
-
|
|
4920
5144
|
// ggml_timestep_embedding
|
|
4921
5145
|
|
|
4922
5146
|
struct ggml_tensor * ggml_timestep_embedding(
|
|
@@ -4936,6 +5160,61 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|
|
4936
5160
|
return result;
|
|
4937
5161
|
}
|
|
4938
5162
|
|
|
5163
|
+
// ggml_tri
|
|
5164
|
+
|
|
5165
|
+
struct ggml_tensor * ggml_tri(
|
|
5166
|
+
struct ggml_context * ctx,
|
|
5167
|
+
struct ggml_tensor * a,
|
|
5168
|
+
enum ggml_tri_type type) {
|
|
5169
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5170
|
+
|
|
5171
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5172
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
5173
|
+
|
|
5174
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
5175
|
+
|
|
5176
|
+
ggml_set_op_params_i32(result, 0, type);
|
|
5177
|
+
|
|
5178
|
+
result->op = GGML_OP_TRI;
|
|
5179
|
+
result->src[0] = a;
|
|
5180
|
+
|
|
5181
|
+
return result;
|
|
5182
|
+
}
|
|
5183
|
+
|
|
5184
|
+
// ggml_fill
|
|
5185
|
+
|
|
5186
|
+
static struct ggml_tensor * ggml_fill_impl(
|
|
5187
|
+
struct ggml_context * ctx,
|
|
5188
|
+
struct ggml_tensor * a,
|
|
5189
|
+
float c,
|
|
5190
|
+
bool inplace) {
|
|
5191
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5192
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5193
|
+
|
|
5194
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
5195
|
+
|
|
5196
|
+
ggml_set_op_params_f32(result, 0, c);
|
|
5197
|
+
|
|
5198
|
+
result->op = GGML_OP_FILL;
|
|
5199
|
+
result->src[0] = a;
|
|
5200
|
+
|
|
5201
|
+
return result;
|
|
5202
|
+
}
|
|
5203
|
+
|
|
5204
|
+
struct ggml_tensor * ggml_fill(
|
|
5205
|
+
struct ggml_context * ctx,
|
|
5206
|
+
struct ggml_tensor * a,
|
|
5207
|
+
float c) {
|
|
5208
|
+
return ggml_fill_impl(ctx, a, c, false);
|
|
5209
|
+
}
|
|
5210
|
+
|
|
5211
|
+
struct ggml_tensor * ggml_fill_inplace(
|
|
5212
|
+
struct ggml_context * ctx,
|
|
5213
|
+
struct ggml_tensor * a,
|
|
5214
|
+
float c) {
|
|
5215
|
+
return ggml_fill_impl(ctx, a, c, true);
|
|
5216
|
+
}
|
|
5217
|
+
|
|
4939
5218
|
// ggml_argsort
|
|
4940
5219
|
|
|
4941
5220
|
struct ggml_tensor * ggml_argsort(
|
|
@@ -4943,6 +5222,7 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4943
5222
|
struct ggml_tensor * a,
|
|
4944
5223
|
enum ggml_sort_order order) {
|
|
4945
5224
|
GGML_ASSERT(a->ne[0] <= INT32_MAX);
|
|
5225
|
+
|
|
4946
5226
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
|
4947
5227
|
|
|
4948
5228
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
|
@@ -4953,9 +5233,9 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4953
5233
|
return result;
|
|
4954
5234
|
}
|
|
4955
5235
|
|
|
4956
|
-
//
|
|
5236
|
+
// ggml_argsort_top_k
|
|
4957
5237
|
|
|
4958
|
-
struct ggml_tensor *
|
|
5238
|
+
struct ggml_tensor * ggml_argsort_top_k(
|
|
4959
5239
|
struct ggml_context * ctx,
|
|
4960
5240
|
struct ggml_tensor * a,
|
|
4961
5241
|
int k) {
|
|
@@ -4971,6 +5251,44 @@ struct ggml_tensor * ggml_top_k(
|
|
|
4971
5251
|
return result;
|
|
4972
5252
|
}
|
|
4973
5253
|
|
|
5254
|
+
// ggml_top_k
|
|
5255
|
+
|
|
5256
|
+
struct ggml_tensor * ggml_top_k(
|
|
5257
|
+
struct ggml_context * ctx,
|
|
5258
|
+
struct ggml_tensor * a,
|
|
5259
|
+
int k) {
|
|
5260
|
+
GGML_ASSERT(a->ne[0] >= k);
|
|
5261
|
+
|
|
5262
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
|
|
5263
|
+
|
|
5264
|
+
result->op = GGML_OP_TOP_K;
|
|
5265
|
+
result->src[0] = a;
|
|
5266
|
+
|
|
5267
|
+
return result;
|
|
5268
|
+
}
|
|
5269
|
+
|
|
5270
|
+
// ggml_arange
|
|
5271
|
+
|
|
5272
|
+
struct ggml_tensor * ggml_arange(
|
|
5273
|
+
struct ggml_context * ctx,
|
|
5274
|
+
float start,
|
|
5275
|
+
float stop,
|
|
5276
|
+
float step) {
|
|
5277
|
+
GGML_ASSERT(stop > start);
|
|
5278
|
+
|
|
5279
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
5280
|
+
|
|
5281
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
5282
|
+
|
|
5283
|
+
ggml_set_op_params_f32(result, 0, start);
|
|
5284
|
+
ggml_set_op_params_f32(result, 1, stop);
|
|
5285
|
+
ggml_set_op_params_f32(result, 2, step);
|
|
5286
|
+
|
|
5287
|
+
result->op = GGML_OP_ARANGE;
|
|
5288
|
+
|
|
5289
|
+
return result;
|
|
5290
|
+
}
|
|
5291
|
+
|
|
4974
5292
|
// ggml_flash_attn_ext
|
|
4975
5293
|
|
|
4976
5294
|
struct ggml_tensor * ggml_flash_attn_ext(
|
|
@@ -4990,8 +5308,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
|
|
|
4990
5308
|
|
|
4991
5309
|
if (mask) {
|
|
4992
5310
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
4993
|
-
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
|
|
4994
|
-
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
|
|
4995
5311
|
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
|
|
4996
5312
|
|
|
4997
5313
|
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
|
|
@@ -5473,7 +5789,7 @@ static struct ggml_tensor * ggml_unary_impl(
|
|
|
5473
5789
|
struct ggml_tensor * a,
|
|
5474
5790
|
enum ggml_unary_op op,
|
|
5475
5791
|
bool inplace) {
|
|
5476
|
-
GGML_ASSERT(
|
|
5792
|
+
GGML_ASSERT(ggml_is_contiguous_rows(a));
|
|
5477
5793
|
|
|
5478
5794
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
5479
5795
|
|
|
@@ -5790,6 +6106,92 @@ struct ggml_tensor * ggml_opt_step_sgd(
|
|
|
5790
6106
|
return result;
|
|
5791
6107
|
}
|
|
5792
6108
|
|
|
6109
|
+
// solve_tri
|
|
6110
|
+
|
|
6111
|
+
struct ggml_tensor * ggml_solve_tri(
|
|
6112
|
+
struct ggml_context * ctx,
|
|
6113
|
+
struct ggml_tensor * a,
|
|
6114
|
+
struct ggml_tensor * b,
|
|
6115
|
+
bool left,
|
|
6116
|
+
bool lower,
|
|
6117
|
+
bool uni) {
|
|
6118
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
6119
|
+
GGML_ASSERT(b->type == GGML_TYPE_F32);
|
|
6120
|
+
|
|
6121
|
+
// A must be square and lower diagonal
|
|
6122
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
6123
|
+
// B must have same outer dimension as A
|
|
6124
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
|
6125
|
+
|
|
6126
|
+
// batch dimensions must be equal
|
|
6127
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
6128
|
+
GGML_ASSERT(a->ne[3] == b->ne[3]);
|
|
6129
|
+
|
|
6130
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
6131
|
+
GGML_ASSERT(ggml_is_contiguous(b));
|
|
6132
|
+
|
|
6133
|
+
GGML_ASSERT(lower && left && !uni); // TODO: support other variants
|
|
6134
|
+
|
|
6135
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
|
|
6136
|
+
|
|
6137
|
+
result->op = GGML_OP_SOLVE_TRI;
|
|
6138
|
+
result->src[0] = a;
|
|
6139
|
+
result->src[1] = b;
|
|
6140
|
+
|
|
6141
|
+
return result;
|
|
6142
|
+
}
|
|
6143
|
+
|
|
6144
|
+
// ggml_gated_delta_net
|
|
6145
|
+
|
|
6146
|
+
struct ggml_tensor * ggml_gated_delta_net(
|
|
6147
|
+
struct ggml_context * ctx,
|
|
6148
|
+
struct ggml_tensor * q,
|
|
6149
|
+
struct ggml_tensor * k,
|
|
6150
|
+
struct ggml_tensor * v,
|
|
6151
|
+
struct ggml_tensor * g,
|
|
6152
|
+
struct ggml_tensor * beta,
|
|
6153
|
+
struct ggml_tensor * state) {
|
|
6154
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
6155
|
+
GGML_ASSERT(ggml_is_contiguous_rows(k));
|
|
6156
|
+
GGML_ASSERT(ggml_is_contiguous_rows(v));
|
|
6157
|
+
GGML_ASSERT(ggml_is_contiguous(g));
|
|
6158
|
+
GGML_ASSERT(ggml_is_contiguous(beta));
|
|
6159
|
+
GGML_ASSERT(ggml_is_contiguous(state));
|
|
6160
|
+
|
|
6161
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
6162
|
+
GGML_ASSERT(k->type == GGML_TYPE_F32);
|
|
6163
|
+
GGML_ASSERT(v->type == GGML_TYPE_F32);
|
|
6164
|
+
GGML_ASSERT(g->type == GGML_TYPE_F32);
|
|
6165
|
+
GGML_ASSERT(beta->type == GGML_TYPE_F32);
|
|
6166
|
+
GGML_ASSERT(state->type == GGML_TYPE_F32);
|
|
6167
|
+
|
|
6168
|
+
const int64_t S_v = v->ne[0];
|
|
6169
|
+
const int64_t H = v->ne[1];
|
|
6170
|
+
const int64_t n_tokens = v->ne[2];
|
|
6171
|
+
const int64_t n_seqs = v->ne[3];
|
|
6172
|
+
|
|
6173
|
+
// gate: scalar [1, H, T, B] or vector [S_v, H, T, B] (KDA)
|
|
6174
|
+
GGML_ASSERT(g->ne[0] == 1 || g->ne[0] == S_v);
|
|
6175
|
+
GGML_ASSERT(beta->ne[0] == 1);
|
|
6176
|
+
|
|
6177
|
+
GGML_ASSERT(ggml_nelements(state) == S_v * S_v * H * n_seqs);
|
|
6178
|
+
|
|
6179
|
+
// concat output and new_state into a single tensor
|
|
6180
|
+
// output: S_v * H * n_tokens * n_seqs, state: S_v * S_v * H * n_seqs
|
|
6181
|
+
const int64_t ne[4] = { S_v * H, n_tokens * n_seqs + S_v * n_seqs, 1, 1 };
|
|
6182
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
6183
|
+
|
|
6184
|
+
result->op = GGML_OP_GATED_DELTA_NET;
|
|
6185
|
+
result->src[0] = q;
|
|
6186
|
+
result->src[1] = k;
|
|
6187
|
+
result->src[2] = v;
|
|
6188
|
+
result->src[3] = g;
|
|
6189
|
+
result->src[4] = beta;
|
|
6190
|
+
result->src[5] = state;
|
|
6191
|
+
|
|
6192
|
+
return result;
|
|
6193
|
+
}
|
|
6194
|
+
|
|
5793
6195
|
////////////////////////////////////////////////////////////////////////////////
|
|
5794
6196
|
|
|
5795
6197
|
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
|
@@ -6251,7 +6653,7 @@ static void ggml_compute_backward(
|
|
|
6251
6653
|
case GGML_OP_DIAG_MASK_INF: {
|
|
6252
6654
|
if (src0_needs_grads) {
|
|
6253
6655
|
/* ggml_diag_mask_inf_impl() shouldn't be here */
|
|
6254
|
-
/* ref: https://github.com/
|
|
6656
|
+
/* ref: https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
|
|
6255
6657
|
const int n_past = ((const int32_t *) tensor->op_params)[0];
|
|
6256
6658
|
ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
|
|
6257
6659
|
}
|
|
@@ -6362,6 +6764,16 @@ static void ggml_compute_backward(
|
|
|
6362
6764
|
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
|
|
6363
6765
|
}
|
|
6364
6766
|
} break;
|
|
6767
|
+
case GGML_UNARY_OP_EXPM1: {
|
|
6768
|
+
if (src0_needs_grads) {
|
|
6769
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
|
|
6770
|
+
}
|
|
6771
|
+
} break;
|
|
6772
|
+
case GGML_UNARY_OP_SOFTPLUS: {
|
|
6773
|
+
if (src0_needs_grads) {
|
|
6774
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
|
|
6775
|
+
}
|
|
6776
|
+
} break;
|
|
6365
6777
|
default: {
|
|
6366
6778
|
fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
|
|
6367
6779
|
__func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
|
|
@@ -6405,20 +6817,35 @@ static void ggml_compute_backward(
|
|
|
6405
6817
|
GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
|
|
6406
6818
|
}
|
|
6407
6819
|
|
|
6408
|
-
static size_t
|
|
6409
|
-
|
|
6410
|
-
|
|
6820
|
+
static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
|
|
6821
|
+
if (node->op != GGML_OP_NONE && compute) {
|
|
6822
|
+
node->flags |= GGML_TENSOR_FLAG_COMPUTE;
|
|
6823
|
+
}
|
|
6824
|
+
|
|
6825
|
+
const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
6411
6826
|
GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
|
|
6412
|
-
|
|
6413
|
-
|
|
6414
|
-
cgraph->visited_hash_set.keys[node_hash_pos] = node;
|
|
6415
|
-
ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
|
|
6416
|
-
cgraph->use_counts[node_hash_pos] = 0;
|
|
6417
|
-
} else {
|
|
6827
|
+
|
|
6828
|
+
if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
|
|
6418
6829
|
// already visited
|
|
6830
|
+
|
|
6831
|
+
if (compute) {
|
|
6832
|
+
// update the compute flag regardless
|
|
6833
|
+
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
|
6834
|
+
struct ggml_tensor * src = node->src[i];
|
|
6835
|
+
if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
|
|
6836
|
+
ggml_visit_parents_graph(cgraph, src, true);
|
|
6837
|
+
}
|
|
6838
|
+
}
|
|
6839
|
+
}
|
|
6840
|
+
|
|
6419
6841
|
return node_hash_pos;
|
|
6420
6842
|
}
|
|
6421
6843
|
|
|
6844
|
+
// This is the first time we see this node in the current graph.
|
|
6845
|
+
cgraph->visited_hash_set.keys[node_hash_pos] = node;
|
|
6846
|
+
ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
|
|
6847
|
+
cgraph->use_counts[node_hash_pos] = 0;
|
|
6848
|
+
|
|
6422
6849
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
|
6423
6850
|
const int k =
|
|
6424
6851
|
(cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
|
|
@@ -6427,7 +6854,7 @@ static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor
|
|
|
6427
6854
|
|
|
6428
6855
|
struct ggml_tensor * src = node->src[k];
|
|
6429
6856
|
if (src) {
|
|
6430
|
-
size_t src_hash_pos =
|
|
6857
|
+
const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
|
|
6431
6858
|
|
|
6432
6859
|
// Update the use count for this operand.
|
|
6433
6860
|
cgraph->use_counts[src_hash_pos]++;
|
|
@@ -6458,17 +6885,17 @@ static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor
|
|
|
6458
6885
|
return node_hash_pos;
|
|
6459
6886
|
}
|
|
6460
6887
|
|
|
6461
|
-
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
|
6888
|
+
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
|
|
6462
6889
|
if (!expand) {
|
|
6463
6890
|
// TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
|
|
6464
6891
|
ggml_graph_clear(cgraph);
|
|
6465
6892
|
}
|
|
6466
6893
|
|
|
6467
|
-
const int
|
|
6894
|
+
const int n_old = cgraph->n_nodes;
|
|
6468
6895
|
|
|
6469
|
-
|
|
6896
|
+
ggml_visit_parents_graph(cgraph, tensor, compute);
|
|
6470
6897
|
|
|
6471
|
-
const int n_new = cgraph->n_nodes -
|
|
6898
|
+
const int n_new = cgraph->n_nodes - n_old;
|
|
6472
6899
|
GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
|
|
6473
6900
|
|
|
6474
6901
|
if (n_new > 0) {
|
|
@@ -6477,8 +6904,22 @@ static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_ten
|
|
|
6477
6904
|
}
|
|
6478
6905
|
}
|
|
6479
6906
|
|
|
6907
|
+
struct ggml_tensor * ggml_build_forward_select(
|
|
6908
|
+
struct ggml_cgraph * cgraph,
|
|
6909
|
+
struct ggml_tensor ** tensors,
|
|
6910
|
+
int n_tensors,
|
|
6911
|
+
int idx) {
|
|
6912
|
+
GGML_ASSERT(idx >= 0 && idx < n_tensors);
|
|
6913
|
+
|
|
6914
|
+
for (int i = 0; i < n_tensors; i++) {
|
|
6915
|
+
ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
|
|
6916
|
+
}
|
|
6917
|
+
|
|
6918
|
+
return tensors[idx];
|
|
6919
|
+
}
|
|
6920
|
+
|
|
6480
6921
|
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
|
|
6481
|
-
ggml_build_forward_impl(cgraph, tensor, true);
|
|
6922
|
+
ggml_build_forward_impl(cgraph, tensor, true, true);
|
|
6482
6923
|
}
|
|
6483
6924
|
|
|
6484
6925
|
void ggml_build_backward_expand(
|
|
@@ -6872,6 +7313,82 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
|
6872
7313
|
GGML_LOG_INFO("========================================\n");
|
|
6873
7314
|
}
|
|
6874
7315
|
|
|
7316
|
+
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
|
|
7317
|
+
const int * idxs,
|
|
7318
|
+
int count,
|
|
7319
|
+
const struct ggml_tensor * tensor) {
|
|
7320
|
+
GGML_ASSERT(cgraph && idxs);
|
|
7321
|
+
for (int i = 0; i < count; ++i) {
|
|
7322
|
+
const int node_idx = idxs[i];
|
|
7323
|
+
|
|
7324
|
+
if (node_idx >= cgraph->n_nodes) {
|
|
7325
|
+
return -1;
|
|
7326
|
+
}
|
|
7327
|
+
if (cgraph->nodes[node_idx] == tensor) {
|
|
7328
|
+
return i;
|
|
7329
|
+
}
|
|
7330
|
+
}
|
|
7331
|
+
return -1;
|
|
7332
|
+
}
|
|
7333
|
+
|
|
7334
|
+
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
|
|
7335
|
+
const int * node_idxs,
|
|
7336
|
+
int count,
|
|
7337
|
+
const enum ggml_op * ops,
|
|
7338
|
+
const int * outputs,
|
|
7339
|
+
int num_outputs) {
|
|
7340
|
+
GGML_ASSERT(outputs && num_outputs > 0);
|
|
7341
|
+
|
|
7342
|
+
for (int i = 0; i < count; ++i) {
|
|
7343
|
+
if (node_idxs[i] >= cgraph->n_nodes) {
|
|
7344
|
+
return false;
|
|
7345
|
+
}
|
|
7346
|
+
|
|
7347
|
+
const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
|
|
7348
|
+
|
|
7349
|
+
if (node->op != ops[i]) {
|
|
7350
|
+
return false;
|
|
7351
|
+
}
|
|
7352
|
+
|
|
7353
|
+
if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
|
|
7354
|
+
return false;
|
|
7355
|
+
}
|
|
7356
|
+
|
|
7357
|
+
if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
|
|
7358
|
+
continue;
|
|
7359
|
+
}
|
|
7360
|
+
|
|
7361
|
+
if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
|
|
7362
|
+
return false;
|
|
7363
|
+
}
|
|
7364
|
+
|
|
7365
|
+
int subgraph_uses = 0;
|
|
7366
|
+
for (int j = i + 1; j < count; ++j) {
|
|
7367
|
+
const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
|
|
7368
|
+
for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
|
|
7369
|
+
if (other_node->src[src_idx] == node) {
|
|
7370
|
+
subgraph_uses++;
|
|
7371
|
+
}
|
|
7372
|
+
}
|
|
7373
|
+
}
|
|
7374
|
+
|
|
7375
|
+
if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
|
|
7376
|
+
return false;
|
|
7377
|
+
}
|
|
7378
|
+
|
|
7379
|
+
// if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
|
|
7380
|
+
struct ggml_tensor * view_src = node->view_src;
|
|
7381
|
+
while (view_src) {
|
|
7382
|
+
if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
|
|
7383
|
+
return false;
|
|
7384
|
+
}
|
|
7385
|
+
view_src = view_src->view_src;
|
|
7386
|
+
}
|
|
7387
|
+
}
|
|
7388
|
+
|
|
7389
|
+
return true;
|
|
7390
|
+
}
|
|
7391
|
+
|
|
6875
7392
|
// check if node is part of the graph
|
|
6876
7393
|
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
|
6877
7394
|
if (cgraph == NULL) {
|
|
@@ -6918,7 +7435,7 @@ static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node,
|
|
|
6918
7435
|
label);
|
|
6919
7436
|
}
|
|
6920
7437
|
|
|
6921
|
-
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph *
|
|
7438
|
+
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
|
|
6922
7439
|
char color[16];
|
|
6923
7440
|
|
|
6924
7441
|
FILE * fp = ggml_fopen(filename, "w");
|
|
@@ -6939,7 +7456,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
|
6939
7456
|
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
|
6940
7457
|
snprintf(color, sizeof(color), "yellow");
|
|
6941
7458
|
} else if (grad) {
|
|
6942
|
-
if (ggml_graph_find(
|
|
7459
|
+
if (ggml_graph_find(cgraph, node)) {
|
|
6943
7460
|
snprintf(color, sizeof(color), "green");
|
|
6944
7461
|
} else {
|
|
6945
7462
|
snprintf(color, sizeof(color), "lightblue");
|
|
@@ -7091,8 +7608,11 @@ void ggml_quantize_free(void) {
|
|
|
7091
7608
|
|
|
7092
7609
|
iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
|
|
7093
7610
|
iq2xs_free_impl(GGML_TYPE_IQ2_XS);
|
|
7611
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_S);
|
|
7094
7612
|
iq2xs_free_impl(GGML_TYPE_IQ1_S);
|
|
7613
|
+
iq2xs_free_impl(GGML_TYPE_IQ1_M);
|
|
7095
7614
|
iq3xs_free_impl(256);
|
|
7615
|
+
iq3xs_free_impl(512);
|
|
7096
7616
|
|
|
7097
7617
|
ggml_critical_section_end();
|
|
7098
7618
|
}
|
|
@@ -7136,6 +7656,7 @@ size_t ggml_quantize_chunk(
|
|
|
7136
7656
|
case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7137
7657
|
case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7138
7658
|
case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7659
|
+
case GGML_TYPE_NVFP4: result = quantize_nvfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7139
7660
|
case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7140
7661
|
case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7141
7662
|
case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
@@ -7181,6 +7702,11 @@ size_t ggml_quantize_chunk(
|
|
|
7181
7702
|
|
|
7182
7703
|
////////////////////////////////////////////////////////////////////////////////
|
|
7183
7704
|
|
|
7705
|
+
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
|
|
7706
|
+
*log_callback = g_logger_state.log_callback;
|
|
7707
|
+
*user_data = g_logger_state.log_callback_user_data;
|
|
7708
|
+
}
|
|
7709
|
+
|
|
7184
7710
|
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
7185
7711
|
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
|
7186
7712
|
g_logger_state.log_callback_user_data = user_data;
|